コード例 #1
0
def _get_axes(*arrays):
    """ find list of axes from a list of axis-aligned DimArray objects
    """
    dims = get_dims(*arrays) # all dimensions present in objects
    axes = Axes()

    for dim in dims:

        common_axis = None

        for o in arrays:

            # skip missing dimensions
            if dim not in o.dims: continue

            axis = o.axes[dim]

            # update values
            if common_axis is None or (common_axis.size==1 and axis.size > 1):
                common_axis = axis

            # Test alignment for non-singleton axes
	    if not (axis.size == 1 or np.all(axis.values==common_axis.values)):
		raise ValueError("axes are not aligned")

        # append new axis
        axes.append(common_axis)


    return axes
コード例 #2
0
ファイル: indexing.py プロジェクト: koenvo/dimarray
def _take_broadcast(a, indices):
    """ broadcast array-indices & integers, numpy's classical

    Examples:
    ---------
    >>> a = da.zeros(shape=(3,4,5,6))
    >>> a[:,[0, 1],:,2].shape
    (2, 3, 5)
    >>> a[:,[0, 1],2,:].shape
    (3, 2, 6)
    """
    # new values
    newval = a.values[indices]

    # if the new values is a scalar, then just return it
    if np.isscalar(newval):
        return newval

    # new axes: broacast indices (should do the same as above, since integers are just broadcast)
    indices2 = broadcast_indices(indices)
    # assert np.all(newval == a.values[indices2])

    # make a multi-axis with tuples
    is_array2 = np.array([np.iterable(ix) for ix in indices2])
    nb_array2 = is_array2.sum()

    # If none or one array is present, easy
    if nb_array2 <= 1:
        newaxes = [
            a.axes[i][ix] for i, ix in enumerate(indices) if not np.isscalar(ix)
        ]  # indices or indices2, does not matter

    # else, finer check needed
    else:
        # same stats but on original indices
        is_array = np.array([np.iterable(ix) for ix in indices])
        array_ix_pos = np.where(is_array)[0]

        # Determine where the axis will be inserted
        # - need to consider the integers as well (broadcast as arrays)
        # - if two indexed dimensions are not contiguous, new axis placed at first position...
        # a = zeros((3,4,5,6))
        # a[:,[1,2],:,0].shape ==> (2, 3, 5)
        # a[:,[1,2],0,:].shape ==> (3, 2, 6)
        array_ix_pos2 = np.where(is_array2)[0]
        if np.any(np.diff(array_ix_pos2) > 1):  # that mean, if two indexed dimensions are not contiguous
            insert = 0
        else:
            insert = array_ix_pos2[0]

        # Now determine axis value
        # ...if originally only one array was provided, use these values correspondingly
        if len(array_ix_pos) == 1:
            i = array_ix_pos[0]
            values = a.axes[i].values[indices[i]]
            name = a.axes[i].name

        # ...else use a list of tuples
        else:
            values = zip(*[a.axes[i].values[indices2[i]] for i in array_ix_pos])
            name = ",".join([a.axes[i].name for i in array_ix_pos])

        broadcastaxis = Axis(values, name)

        newaxes = Axes()
        for i, ax in enumerate(a.axes):

            # axis is already part of the broadcast axis: skip
            if is_array2[i]:
                continue

            else:
                newaxis = ax[indices2[i]]

                ## do not append axis if scalar
                # if np.isscalar(newaxis):
                #    continue

            newaxes.append(newaxis)

        # insert the right new axis at the appropriate position
        newaxes.insert(insert, broadcastaxis)

    return a._constructor(newval, newaxes, **a._metadata)
コード例 #3
0
def _take_broadcast(a, indices):
    """ broadcast array-indices & integers, numpy's classical

    Examples:
    ---------
    >>> a = da.zeros(shape=(3,4,5,6))
    >>> a[:,[0, 1],:,2].shape
    (2, 3, 5)
    >>> a[:,[0, 1],2,:].shape
    (3, 2, 6)
    """
    # new values
    newval = a.values[indices]

    # if the new values is a scalar, then just return it
    if np.isscalar(newval):
        return newval

    # new axes: broacast indices (should do the same as above, since integers are just broadcast)
    indices2 = broadcast_indices(indices)
    # assert np.all(newval == a.values[indices2])

    # make a multi-axis with tuples
    is_array2 = np.array([np.iterable(ix) for ix in indices2])
    nb_array2 = is_array2.sum()

    # If none or one array is present, easy
    if nb_array2 <= 1:
        newaxes = [
            a.axes[i][ix] for i, ix in enumerate(indices)
            if not np.isscalar(ix)
        ]  # indices or indices2, does not matter

    # else, finer check needed
    else:
        # same stats but on original indices
        is_array = np.array([np.iterable(ix) for ix in indices])
        array_ix_pos = np.where(is_array)[0]

        # Determine where the axis will be inserted
        # - need to consider the integers as well (broadcast as arrays)
        # - if two indexed dimensions are not contiguous, new axis placed at first position...
        # a = zeros((3,4,5,6))
        # a[:,[1,2],:,0].shape ==> (2, 3, 5)
        # a[:,[1,2],0,:].shape ==> (3, 2, 6)
        array_ix_pos2 = np.where(is_array2)[0]
        if np.any(np.diff(array_ix_pos2) > 1
                  ):  # that mean, if two indexed dimensions are not contiguous
            insert = 0
        else:
            insert = array_ix_pos2[0]

        # Now determine axis value
        # ...if originally only one array was provided, use these values correspondingly
        if len(array_ix_pos) == 1:
            i = array_ix_pos[0]
            values = a.axes[i].values[indices[i]]
            name = a.axes[i].name

        # ...else use a list of tuples
        else:
            values = zip(
                *[a.axes[i].values[indices2[i]] for i in array_ix_pos])
            name = ",".join([a.axes[i].name for i in array_ix_pos])

        broadcastaxis = Axis(values, name)

        newaxes = Axes()
        for i, ax in enumerate(a.axes):

            # axis is already part of the broadcast axis: skip
            if is_array2[i]:
                continue

            else:
                newaxis = ax[indices2[i]]

                ## do not append axis if scalar
                #if np.isscalar(newaxis):
                #    continue

            newaxes.append(newaxis)

        # insert the right new axis at the appropriate position
        newaxes.insert(insert, broadcastaxis)

    return a._constructor(newval, newaxes, **a._metadata)
コード例 #4
0
def aggregate(arrays, check_overlap=True):
    """ like a multi-dimensional concatenate

    input:
        arrays: sequence of DimArrays

        check_overlap, optional: if True, check that arrays do not overlap (to avoid data loss)
            If any two elements overlap, keep the one which is not NaN, if applicable
            or raise an error if two valid values overlap

            Default is True to reduce the risk of errors, but this makes the operation
            less performant since every time a copy of the subarray is extracted 
            and tested for NaNs. Consider setting check_overlap to False for large
            arrays for a well-tested problems, if the valid-nan selection is not 
            required.

    Note:
        Probably a bad idea to have duplicate axis values (not tested)

    TODO: add support for missing values other than np.nan

    Examples:
    ---------
    >>> a = DimArray([[1.,2,3]],axes=[('line',[0]), ('col',['a','b','c'])])
    >>> b = DimArray([[4],[5]], axes=[('line',[1,2]), ('col',['d'])])
    >>> c = DimArray([[22]], axes=[('line',[2]), ('col',['b'])])
    >>> d = DimArray([-99], axes=[('line',[4])])
    >>> aggregate((a,b,c,d))
    dimarray: 10 non-null elements (6 null)
    dimensions: 'line', 'col'
    0 / line (4): 0 to 4
    1 / col (4): a to d
    array([[  1.,   2.,   3.,  nan],
           [ nan,  nan,  nan,   4.],
           [ nan,  22.,  nan,   5.],
           [-99., -99., -99., -99.]])

    But beware of overlapping arrays. The following will raise an error:
    >>> a = DimArray([[1.,2,3]],axes=[('line',[0]), ('col',['a','b','c'])])
    >>> b = DimArray([[4],[5]], axes=[('line',[0,1]), ('col',['b'])])
    >>> try:
    ...            aggregate((a,b))    
    ... except ValueError, msg:
    ...            print msg
    Overlapping arrays: set check_overlap to False to suppress this error.

    Can set check_overlap to False to let it happen anyway (the latter array wins)
    >>> aggregate((a,b), check_overlap=False)  
    dimarray: 4 non-null elements (2 null)
    dimensions: 'line', 'col'
    0 / line (2): 0 to 1
    1 / col (3): a to c
    array([[  1.,   4.,   3.],
           [ nan,   5.,  nan]])

    Note that if NaNs are present on overlapping, the valid data are kept
    >>> a = DimArray([[1.,2,3]],axes=[('line',[1]), ('col',['a','b','c'])])
    >>> b = DimArray([[np.nan],[5]], axes=[('line',[1,2]), ('col',['b'])])
    >>> aggregate((a,b)) # does not overwrite `2` at location (1, 'b')
    dimarray: 4 non-null elements (2 null)
    dimensions: 'line', 'col'
    0 / line (2): 1 to 2
    1 / col (3): a to c
    array([[  1.,   2.,   3.],
           [ nan,   5.,  nan]])
    """
    # list of common dimensions
    dims = get_dims(*arrays)

    # build a common Axes object 
    axes = Axes()
    for d in dims:
        newaxis = concatenate_axes([a.axes[d] for a in arrays if d in a.dims])
        newaxis.values = np.unique(newaxis.values) # unique values
        axes.append(newaxis)

    # Fill in an array
    newarray = arrays[0]._constructor(None, axes=axes, dtype=arrays[0].dtype)
    for a in arrays:

        indices = {ax.name:ax.values for ax in a.axes}

        if check_overlap:

            # look for nans in replaced and replacing arrays
            subarray = newarray.take(indices, broadcast_arrays=False).values
            subarray_is_nan = np.isnan(subarray)
            newvalues_is_nan = np.isnan(a.values)

            # check overlapping
            overlap_values  = ~subarray_is_nan & ~newvalues_is_nan
            if np.any(overlap_values):
                raise ValueError("Overlapping arrays: set check_overlap to False to suppress this error.")

            # only take new non-nan values
            newvalues = np.where(newvalues_is_nan, subarray, a.values) 

        else:
            newvalues = a.values

        # The actual operation is done by put
        newarray.put(newvalues, indices=indices, inplace=True, convert=True, broadcast_arrays=False)

    # That's it !

    return newarray