def _move_tuple_axes_first(array, axis): """ Bottleneck can only take integer axis, not tuple, so this function takes all the axes to be operated on and combines them into the first dimension of the array so that we can then use axis=0 """ # Figure out how many axes we are operating over naxis = len(axis) # Add remaining axes to the axis tuple axis += tuple(i for i in range(array.ndim) if i not in axis) # The new position of each axis is just in order destination = tuple(range(array.ndim)) # Reorder the array so that the axes being operated on are at the beginning array_new = np.moveaxis(array, axis, destination) # Figure out the size of the product of the dimensions being operated on first = np.prod(array_new.shape[:naxis]) # Collapse the dimensions being operated on into a single dimension so that # we can then use axis=0 with the bottleneck functions array_new = array_new.reshape((first,) + array_new.shape[naxis:]) return array_new
def _move_tuple_axes_first(array, axis): """ Bottleneck can only take integer axis, not tuple, so this function takes all the axes to be operated on and combines them into the first dimension of the array so that we can then use axis=0 """ # Figure out how many axes we are operating over naxis = len(axis) # Add remaining axes to the axis tuple axis += tuple(i for i in range(array.ndim) if i not in axis) # The new position of each axis is just in order destination = tuple(range(array.ndim)) # Reorder the array so that the axes being operated on are at the beginning array_new = np.moveaxis(array, axis, destination) # Figure out the size of the product of the dimensions being operated on first = np.prod(array_new.shape[:naxis]) # Collapse the dimensions being operated on into a single dimension so that # we can then use axis=0 with the bottleneck functions array_new = array_new.reshape((first, ) + array_new.shape[naxis:]) return array_new
def iterate_chunks(shape, chunk_shape=None, n_max=None): """ Given a data shape and a chunk shape (or maximum chunk size), iteratively return slice objects that can be used to slice the array. """ # Shortcut - if there are any 0 elements in the shape, there are no # chunks to iterate over. if np.prod(shape) == 0: return if chunk_shape is None and n_max is None: raise ValueError('Either chunk_shape or n_max should be specified') elif chunk_shape is not None and n_max is not None: raise ValueError( 'Either chunk_shape or n_max should be specified (not both)') elif chunk_shape is None: chunk_shape = find_chunk_shape(shape, n_max) else: if len(chunk_shape) != len(shape): raise ValueError( 'chunk_shape should have the same length as shape') elif any(x > y for (x, y) in zip(chunk_shape, shape)): raise ValueError('chunk_shape should fit within shape') ndim = len(chunk_shape) start_index = [0] * ndim shape = list(shape) while start_index <= shape: end_index = [ min(start_index[i] + chunk_shape[i], shape[i]) for i in range(ndim) ] slices = tuple( [slice(start_index[i], end_index[i]) for i in range(ndim)]) yield slices # Update chunk index. What we do is to increment the # counter for the first dimension, and then if it # exceeds the number of elements in that direction, # cycle back to zero and advance in the next dimension, # and so on. start_index[0] += chunk_shape[0] for i in range(ndim - 1): if start_index[i] >= shape[i]: start_index[i] = 0 start_index[i + 1] += chunk_shape[i + 1] # We can now check whether the iteration is finished if start_index[-1] >= shape[-1]: break
def iterate_chunks(shape, chunk_shape=None, n_max=None): """ Given a data shape and a chunk shape (or maximum chunk size), iteratively return slice objects that can be used to slice the array. """ # Shortcut - if there are any 0 elements in the shape, there are no # chunks to iterate over. if np.prod(shape) == 0: return if chunk_shape is None and n_max is None: raise ValueError('Either chunk_shape or n_max should be specified') elif chunk_shape is not None and n_max is not None: raise ValueError('Either chunk_shape or n_max should be specified (not both)') elif chunk_shape is None: chunk_shape = find_chunk_shape(shape, n_max) else: if len(chunk_shape) != len(shape): raise ValueError('chunk_shape should have the same length as shape') elif any(x > y for (x, y) in zip(chunk_shape, shape)): raise ValueError('chunk_shape should fit within shape') ndim = len(chunk_shape) start_index = [0] * ndim shape = list(shape) while start_index <= shape: end_index = [min(start_index[i] + chunk_shape[i], shape[i]) for i in range(ndim)] slices = tuple([slice(start_index[i], end_index[i]) for i in range(ndim)]) yield slices # Update chunk index. What we do is to increment the # counter for the first dimension, and then if it # exceeds the number of elements in that direction, # cycle back to zero and advance in the next dimension, # and so on. start_index[0] += chunk_shape[0] for i in range(ndim - 1): if start_index[i] >= shape[i]: start_index[i] = 0 start_index[i + 1] += chunk_shape[i + 1] # We can now check whether the iteration is finished if start_index[-1] >= shape[-1]: break
def combine_slices(slice1, slice2, length): """ Given two slices that can be applied to a 1D array and the length of that array, this returns a new slice which is the one that should be applied to the array instead of slice2 if slice1 has already been applied. """ beg1, end1, step1 = slice1.indices(length) beg2, end2, step2 = slice2.indices(length) if step1 < 0 or step2 < 0: raise ValueError( "combine_slices does not support slices with negative step") if beg2 >= end1 or end2 <= beg1: return slice(0, 0, 1) beg = max(beg1, beg2) end = min(end1, end2) if (beg - beg2) % step2 != 0: beg += step2 - ((beg - beg2) % step2) # Now we want to find the two first overlap indices inside the overlap # range. Loop over indices of second slice (but with min/max constraints # of first added) and check if they are valid indices given slice1 indices = [] for idx in range(beg, end, step2): if (idx - beg1) % step1 == 0: indices.append((idx - beg1) // step1) if len(indices) == 2: break if len(indices) == 0: return slice(0, 0, 1) elif len(indices) == 1: return slice(indices[0], indices[0] + 1, 1) else: end_new = (end - beg1) // step1 if (end - beg1) % step1 != 0: end_new += 1 return slice(indices[0], end_new, indices[1] - indices[0])
def combine_slices(slice1, slice2, length): """ Given two slices that can be applied to a 1D array and the length of that array, this returns a new slice which is the one that should be applied to the array instead of slice2 if slice1 has already been applied. """ beg1, end1, step1 = slice1.indices(length) beg2, end2, step2 = slice2.indices(length) if step1 < 0 or step2 < 0: raise ValueError("combine_slices does not support slices with negative step") if beg2 >= end1 or end2 <= beg1: return slice(0, 0, 1) beg = max(beg1, beg2) end = min(end1, end2) if (beg - beg2) % step2 != 0: beg += step2 - ((beg - beg2) % step2) # Now we want to find the two first overlap indices inside the overlap # range. Loop over indices of second slice (but with min/max constraints # of first added) and check if they are valid indices given slice1 indices = [] for idx in range(beg, end, step2): if (idx - beg1) % step1 == 0: indices.append((idx - beg1) // step1) if len(indices) == 2: break if len(indices) == 0: return slice(0, 0, 1) elif len(indices) == 1: return slice(indices[0], indices[0] + 1, 1) else: end_new = (end - beg1) // step1 if (end - beg1) % step1 != 0: end_new += 1 return slice(indices[0], end_new, indices[1] - indices[0])
def format_minimal(values): """ Find the shortest format that can be used to represent all values in an array such that all the string representations are different. The current implementation is not incredibly efficient, but it takes only ~30ms for a 1000 element array and 200ms for a 10000 element array. One could probably make a more efficient implementation but this is good enough for now for what we use it for. """ values = np.asarray(values) if np.max(np.abs(values)) > 1e5 or np.min(np.diff(values)) < 1e-5: fmt_type = 'e' else: fmt_type = 'f' for ndec in range(1, 15): fmt = '{{:.{0}{1}}}'.format(ndec, fmt_type) strings = [fmt.format(x) for x in values] if len(strings) == len(set(strings)): break return fmt, strings
def format_minimal(values): """ Find the shortest format that can be used to represent all values in an array such that all the string representations are different. The current implementation is not incredibly efficient, but it takes only ~30ms for a 1000 element array and 200ms for a 10000 element array. One could probably make a more efficient implementation but this is good enough for now for what we use it for. Returns the optimal format as well as an array of formatted values. """ values = np.asarray(values) if np.max(np.abs(values)) > 1e5 or np.min(np.diff(values)) < 1e-5: fmt_type = 'e' else: fmt_type = 'f' for ndec in range(1, 15): fmt = '{{:.{0}{1}}}'.format(ndec, fmt_type) strings = [fmt.format(x) for x in values] if len(strings) == len(set(strings)): break return fmt, strings