def _get_axes(*arrays): """ find list of axes from a list of axis-aligned DimArray objects """ dims = get_dims(*arrays) # all dimensions present in objects axes = Axes() for dim in dims: common_axis = None for o in arrays: # skip missing dimensions if dim not in o.dims: continue axis = o.axes[dim] # update values if common_axis is None or (common_axis.size==1 and axis.size > 1): common_axis = axis # Test alignment for non-singleton axes if not (axis.size == 1 or np.all(axis.values==common_axis.values)): raise ValueError("axes are not aligned") # append new axis axes.append(common_axis) return axes
def _take_broadcast(a, indices): """ broadcast array-indices & integers, numpy's classical Examples: --------- >>> a = da.zeros(shape=(3,4,5,6)) >>> a[:,[0, 1],:,2].shape (2, 3, 5) >>> a[:,[0, 1],2,:].shape (3, 2, 6) """ # new values newval = a.values[indices] # if the new values is a scalar, then just return it if np.isscalar(newval): return newval # new axes: broacast indices (should do the same as above, since integers are just broadcast) indices2 = broadcast_indices(indices) # assert np.all(newval == a.values[indices2]) # make a multi-axis with tuples is_array2 = np.array([np.iterable(ix) for ix in indices2]) nb_array2 = is_array2.sum() # If none or one array is present, easy if nb_array2 <= 1: newaxes = [ a.axes[i][ix] for i, ix in enumerate(indices) if not np.isscalar(ix) ] # indices or indices2, does not matter # else, finer check needed else: # same stats but on original indices is_array = np.array([np.iterable(ix) for ix in indices]) array_ix_pos = np.where(is_array)[0] # Determine where the axis will be inserted # - need to consider the integers as well (broadcast as arrays) # - if two indexed dimensions are not contiguous, new axis placed at first position... # a = zeros((3,4,5,6)) # a[:,[1,2],:,0].shape ==> (2, 3, 5) # a[:,[1,2],0,:].shape ==> (3, 2, 6) array_ix_pos2 = np.where(is_array2)[0] if np.any(np.diff(array_ix_pos2) > 1): # that mean, if two indexed dimensions are not contiguous insert = 0 else: insert = array_ix_pos2[0] # Now determine axis value # ...if originally only one array was provided, use these values correspondingly if len(array_ix_pos) == 1: i = array_ix_pos[0] values = a.axes[i].values[indices[i]] name = a.axes[i].name # ...else use a list of tuples else: values = zip(*[a.axes[i].values[indices2[i]] for i in array_ix_pos]) name = ",".join([a.axes[i].name for i in array_ix_pos]) broadcastaxis = Axis(values, name) newaxes = Axes() for i, ax in enumerate(a.axes): # axis is already part of the broadcast axis: skip if is_array2[i]: continue else: newaxis = ax[indices2[i]] ## do not append axis if scalar # if np.isscalar(newaxis): # continue newaxes.append(newaxis) # insert the right new axis at the appropriate position newaxes.insert(insert, broadcastaxis) return a._constructor(newval, newaxes, **a._metadata)
def _take_broadcast(a, indices): """ broadcast array-indices & integers, numpy's classical Examples: --------- >>> a = da.zeros(shape=(3,4,5,6)) >>> a[:,[0, 1],:,2].shape (2, 3, 5) >>> a[:,[0, 1],2,:].shape (3, 2, 6) """ # new values newval = a.values[indices] # if the new values is a scalar, then just return it if np.isscalar(newval): return newval # new axes: broacast indices (should do the same as above, since integers are just broadcast) indices2 = broadcast_indices(indices) # assert np.all(newval == a.values[indices2]) # make a multi-axis with tuples is_array2 = np.array([np.iterable(ix) for ix in indices2]) nb_array2 = is_array2.sum() # If none or one array is present, easy if nb_array2 <= 1: newaxes = [ a.axes[i][ix] for i, ix in enumerate(indices) if not np.isscalar(ix) ] # indices or indices2, does not matter # else, finer check needed else: # same stats but on original indices is_array = np.array([np.iterable(ix) for ix in indices]) array_ix_pos = np.where(is_array)[0] # Determine where the axis will be inserted # - need to consider the integers as well (broadcast as arrays) # - if two indexed dimensions are not contiguous, new axis placed at first position... # a = zeros((3,4,5,6)) # a[:,[1,2],:,0].shape ==> (2, 3, 5) # a[:,[1,2],0,:].shape ==> (3, 2, 6) array_ix_pos2 = np.where(is_array2)[0] if np.any(np.diff(array_ix_pos2) > 1 ): # that mean, if two indexed dimensions are not contiguous insert = 0 else: insert = array_ix_pos2[0] # Now determine axis value # ...if originally only one array was provided, use these values correspondingly if len(array_ix_pos) == 1: i = array_ix_pos[0] values = a.axes[i].values[indices[i]] name = a.axes[i].name # ...else use a list of tuples else: values = zip( *[a.axes[i].values[indices2[i]] for i in array_ix_pos]) name = ",".join([a.axes[i].name for i in array_ix_pos]) broadcastaxis = Axis(values, name) newaxes = Axes() for i, ax in enumerate(a.axes): # axis is already part of the broadcast axis: skip if is_array2[i]: continue else: newaxis = ax[indices2[i]] ## do not append axis if scalar #if np.isscalar(newaxis): # continue newaxes.append(newaxis) # insert the right new axis at the appropriate position newaxes.insert(insert, broadcastaxis) return a._constructor(newval, newaxes, **a._metadata)
def aggregate(arrays, check_overlap=True): """ like a multi-dimensional concatenate input: arrays: sequence of DimArrays check_overlap, optional: if True, check that arrays do not overlap (to avoid data loss) If any two elements overlap, keep the one which is not NaN, if applicable or raise an error if two valid values overlap Default is True to reduce the risk of errors, but this makes the operation less performant since every time a copy of the subarray is extracted and tested for NaNs. Consider setting check_overlap to False for large arrays for a well-tested problems, if the valid-nan selection is not required. Note: Probably a bad idea to have duplicate axis values (not tested) TODO: add support for missing values other than np.nan Examples: --------- >>> a = DimArray([[1.,2,3]],axes=[('line',[0]), ('col',['a','b','c'])]) >>> b = DimArray([[4],[5]], axes=[('line',[1,2]), ('col',['d'])]) >>> c = DimArray([[22]], axes=[('line',[2]), ('col',['b'])]) >>> d = DimArray([-99], axes=[('line',[4])]) >>> aggregate((a,b,c,d)) dimarray: 10 non-null elements (6 null) dimensions: 'line', 'col' 0 / line (4): 0 to 4 1 / col (4): a to d array([[ 1., 2., 3., nan], [ nan, nan, nan, 4.], [ nan, 22., nan, 5.], [-99., -99., -99., -99.]]) But beware of overlapping arrays. The following will raise an error: >>> a = DimArray([[1.,2,3]],axes=[('line',[0]), ('col',['a','b','c'])]) >>> b = DimArray([[4],[5]], axes=[('line',[0,1]), ('col',['b'])]) >>> try: ... aggregate((a,b)) ... except ValueError, msg: ... print msg Overlapping arrays: set check_overlap to False to suppress this error. Can set check_overlap to False to let it happen anyway (the latter array wins) >>> aggregate((a,b), check_overlap=False) dimarray: 4 non-null elements (2 null) dimensions: 'line', 'col' 0 / line (2): 0 to 1 1 / col (3): a to c array([[ 1., 4., 3.], [ nan, 5., nan]]) Note that if NaNs are present on overlapping, the valid data are kept >>> a = DimArray([[1.,2,3]],axes=[('line',[1]), ('col',['a','b','c'])]) >>> b = DimArray([[np.nan],[5]], axes=[('line',[1,2]), ('col',['b'])]) >>> aggregate((a,b)) # does not overwrite `2` at location (1, 'b') dimarray: 4 non-null elements (2 null) dimensions: 'line', 'col' 0 / line (2): 1 to 2 1 / col (3): a to c array([[ 1., 2., 3.], [ nan, 5., nan]]) """ # list of common dimensions dims = get_dims(*arrays) # build a common Axes object axes = Axes() for d in dims: newaxis = concatenate_axes([a.axes[d] for a in arrays if d in a.dims]) newaxis.values = np.unique(newaxis.values) # unique values axes.append(newaxis) # Fill in an array newarray = arrays[0]._constructor(None, axes=axes, dtype=arrays[0].dtype) for a in arrays: indices = {ax.name:ax.values for ax in a.axes} if check_overlap: # look for nans in replaced and replacing arrays subarray = newarray.take(indices, broadcast_arrays=False).values subarray_is_nan = np.isnan(subarray) newvalues_is_nan = np.isnan(a.values) # check overlapping overlap_values = ~subarray_is_nan & ~newvalues_is_nan if np.any(overlap_values): raise ValueError("Overlapping arrays: set check_overlap to False to suppress this error.") # only take new non-nan values newvalues = np.where(newvalues_is_nan, subarray, a.values) else: newvalues = a.values # The actual operation is done by put newarray.put(newvalues, indices=indices, inplace=True, convert=True, broadcast_arrays=False) # That's it ! return newarray