def nanmean(self, a: BlockArray, axis=None, keepdims=False, dtype=None): if not array_utils.is_float(a): a = a.astype(np.float64) num_summed = self.sum(~a.ufunc("isnan"), axis=axis, dtype=a.dtype, keepdims=keepdims) if num_summed.ndim == 0 and num_summed == 0: return self.scalar(np.nan) if num_summed.ndim > 0: num_summed = self.where( num_summed == 0, self.empty(num_summed.shape, num_summed.block_shape) * np.nan, num_summed, ) res = (self.reduce( "nansum", a, axis=axis, dtype=dtype, keepdims=keepdims) / num_summed) if dtype is not None: res = res.astype(dtype) return res
def compute_block_shape_static(shape: tuple, dtype: Union[type, np.dtype], cluster_shape: tuple, num_cores: int): # TODO (hme): This should also compute parameters for DeviceGrid. if array_utils.is_float(dtype, type_test=True): dtype = np.finfo(dtype).dtype elif array_utils.is_int(dtype, type_test=True) \ or array_utils.is_uint(dtype, type_test=True): dtype = np.iinfo(dtype).dtype elif array_utils.is_complex(dtype, type_test=True): dtype = np.dtype(dtype) elif dtype in (bool, np.bool_): dtype = np.dtype(np.bool_) else: raise ValueError("dtype %s not supported" % str(dtype)) nbytes = dtype.alignment size = np.product(shape) * nbytes # If the object is less than 100 megabytes, there's not much value in constructing # a block tensor. if size < 10**8: block_shape = shape return block_shape if len(shape) < len(cluster_shape): cluster_shape = cluster_shape[:len(shape)] elif len(shape) > len(cluster_shape): cluster_shape = list(cluster_shape) for axis in range(len(shape)): if axis >= len(cluster_shape): cluster_shape.append(1) cluster_shape = tuple(cluster_shape) shape_np = np.array(shape, dtype=int) # Softmax on cluster shape gives strong preference to larger dimensions. cluster_weights = np.exp(np.array(cluster_shape)) / np.sum( np.exp(cluster_shape)) shape_fracs = np.array(shape) / np.sum(shape) # cluster_weights weight the proportion of cores available along each axis, # and shape_fracs is the proportion of data along each axis. weighted_shape_fracs = cluster_weights * shape_fracs weighted_shape_fracs = weighted_shape_fracs / np.sum( weighted_shape_fracs) # Compute dimensions of grid shape # so that the number of blocks are close to the number of cores. grid_shape_frac = num_cores**weighted_shape_fracs grid_shape = np.floor(grid_shape_frac) # Put remainder on largest axis. remaining = np.sum(grid_shape_frac - grid_shape) grid_shape[np.argmax(shape)] += remaining grid_shape = np.ceil(grid_shape).astype(int) # We use ceiling of floating block shape # so that resulting grid shape is <= to what we compute above. block_shape = tuple((shape_np + grid_shape - 1) // grid_shape) return block_shape
def nbytes(self): if array_utils.is_float(self.dtype, type_test=True): dtype = np.finfo(self.dtype).dtype elif array_utils.is_int(self.dtype, type_test=True) \ or array_utils.is_uint(self.dtype, type_test=True): dtype = np.iinfo(self.dtype).dtype elif array_utils.is_complex(self.dtype, type_test=True): dtype = np.dtype(self.dtype) elif self.dtype in (bool, np.bool_): dtype = np.dtype(np.bool_) else: raise ValueError("dtype %s not supported" % str(self.dtype)) dtype_nbytes = dtype.alignment nbytes = np.product(self.shape) * dtype_nbytes return nbytes