예제 #1
0
    def getplan(self, size, dtype):
        """
        Identify the plan for chunking along each dimension
        """
        from numpy import dtype as gettype
        plan = ones(len(self.value.shape), dtype=int)

        if isinstance(size, tuple):
            plan[self.value.axes] = size

        else:
            # convert from megabytes
            size *= 1000.0

            # calculate from dtype
            element_size = gettype(dtype).itemsize
            nelements = prod(self.value.shape)
            total_size = nelements * element_size
            moving_value_shapes = self.value.shape[self.value.mask]

            if size <= element_size:
                return moving_value_shapes

            remaining_size = 1.0 * total_size
            nchunks = ones(len(moving_value_shapes))
            for (i, s) in enumerate(moving_value_shapes):
                min_chunk_size = remaining_size / s
                if min_chunk_size >= size:
                    nchunks[i] = s
                    remaining_size = min_chunk_size
                    continue
                else:
                    nchunks[i] = ceil(remaining_size / size)
                    break

            plan[self.value.axes] = nchunks

        return plan
예제 #2
0
    def getplan(self, size, dtype):
        """
        Identify the plan for chunking along each dimension
        """
        from numpy import dtype as gettype
        plan = ones(len(self.value.shape), dtype=int)

        if isinstance(size, tuple):
            plan[self.value.axes] = size

        else:
            # convert from megabytes
            size *= 1000.0

            # calculate from dtype
            element_size = gettype(dtype).itemsize
            nelements = prod(self.value.shape)
            total_size = nelements * element_size
            moving_value_shapes = self.value.shape[self.value.mask]

            if size <= element_size:
                return moving_value_shapes

            remaining_size = 1.0*total_size
            nchunks = ones(len(moving_value_shapes))
            for (i, s) in enumerate(moving_value_shapes):
                min_chunk_size = remaining_size/s
                if min_chunk_size >= size:
                    nchunks[i] = s
                    remaining_size = min_chunk_size
                    continue
                else:
                    nchunks[i] = ceil(remaining_size/size)
                    break

            plan[self.value.axes] = nchunks

        return plan
예제 #3
0
파일: chunk.py 프로젝트: gdtm86/bolt
    def getplan(self, size="150", axes=None, padding=None):
        """
        Identify a plan for chunking values along each dimension.

        Generates an ndarray with the size (in number of elements) of chunks
        in each dimension. If provided, will estimate chunks for only a
        subset of axes, leaving all others to the full size of the axis.

        Parameters
        ----------
        size : string or tuple
             If str, the average size (in KB) of the chunks in all value dimensions.
             If int/tuple, an explicit specification of the number chunks in
             each moving value dimension.

        axes : tuple, optional, default=None
              One or more axes to estimate chunks for, if provided any
              other axes will use one chunk.

        padding : tuple or int, option, default=None
            Size over overlapping padding between chunks in each dimension.
            If tuple, specifies padding along each chunked dimension; if int,
            all dimensions use same padding; if None, no padding
        """
        from numpy import dtype as gettype

        # initialize with all elements in one chunk
        plan = self.vshape

        # check for subset of axes
        if axes is None:
            if isinstance(size, str):
                axes = arange(len(self.vshape))
            else:
                axes = arange(len(size))
        else:
            axes = asarray(axes, 'int')

        # set padding
        pad = array(len(self.vshape)*[0, ])
        if padding is not None:
            pad[axes] = padding

        # set the plan
        if isinstance(size, tuple):
            plan[axes] = size

        elif isinstance(size, str):
            # convert from kilobytes
            size = 1000.0 * float(size)

            # calculate from dtype
            elsize = gettype(self.dtype).itemsize
            nelements = prod(self.vshape)
            dims = self.vshape[self.vmask(axes)]

            if size <= elsize:
                s = ones(len(axes))

            else:
                remsize = 1.0 * nelements * elsize
                s = []
                for (i, d) in enumerate(dims):
                    minsize = remsize/d
                    if minsize >= size:
                        s.append(1)
                        remsize = minsize
                        continue
                    else:
                        s.append(min(d, floor(size/minsize)))
                        s[i+1:] = plan[i+1:]
                        break

            plan[axes] = s

        else:
            raise ValueError("Chunk size not understood, must be tuple or int")

        return plan, pad
예제 #4
0
    def getplan(self, size="150", axes=None, padding=None):
        """
        Identify a plan for chunking values along each dimension.

        Generates an ndarray with the size (in number of elements) of chunks
        in each dimension. If provided, will estimate chunks for only a
        subset of axes, leaving all others to the full size of the axis.

        Parameters
        ----------
        size : string or tuple
             If str, the average size (in MB) of the chunks in all value dimensions.
             If int/tuple, an explicit specification of the number chunks in
             each moving value dimension.

        axes : tuple, optional, default=None
              One or more axes to estimate chunks for, if provided any
              other axes will use one chunk.

        padding : tuple or int, option, default=None
            Size over overlapping padding between chunks in each dimension.
            If tuple, specifies padding along each chunked dimension; if int,
            all dimensions use same padding; if None, no padding
        """
        from numpy import dtype as gettype

        # initialize with all elements in one chunk
        plan = self.vshape

        # check for subset of axes
        if axes is None:
            if isinstance(size, str):
                axes = arange(len(self.vshape))
            else:
                axes = arange(len(size))
        else:
            axes = asarray(axes, 'int')

        # set padding
        pad = array(len(self.vshape) * [
            0,
        ])
        if padding is not None:
            pad[axes] = padding

        # set the plan
        if isinstance(size, tuple):
            plan[axes] = size

        elif isinstance(size, str):
            # convert from megabytes
            size = 1000.0 * float(size)

            # calculate from dtype
            elsize = gettype(self.dtype).itemsize
            nelements = prod(self.vshape)
            dims = self.vshape[self.vmask(axes)]

            if size <= elsize:
                s = ones(len(axes))

            else:
                remsize = 1.0 * nelements * elsize
                s = []
                for (i, d) in enumerate(dims):
                    minsize = remsize / d
                    if minsize >= size:
                        s.append(1)
                        remsize = minsize
                        continue
                    else:
                        s.append(min(d, floor(size / minsize)))
                        s[i + 1:] = plan[i + 1:]
                        break

            plan[axes] = s

        else:
            raise ValueError("Chunk size not understood, must be tuple or int")

        return plan, pad