コード例 #1
0
def reduce_axis(data,
                reducer,
                block_reducer,
                mapper=None,
                axis=None,
                blen=None,
                storage=None,
                create='array',
                **kwargs):
    """Apply an operation to `data` that reduces over one or more axes."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    # normalise axis arg
    if isinstance(axis, int):
        axis = (axis, )

    # deal with 'out' kwarg if supplied, can arise if a chunked array is
    # passed as an argument to numpy.sum(), see also
    # https://github.com/cggh/scikit-allel/issues/66
    kwarg_out = kwargs.pop('out', None)
    if kwarg_out is not None:
        raise ValueError('keyword argument "out" is not supported')

    if axis is None or 0 in axis:
        # two-step reduction
        out = None
        for i in range(0, length, blen):
            j = min(i + blen, length)
            block = data[i:j]
            if mapper:
                block = mapper(block)
            res = reducer(block, axis=axis)
            if out is None:
                out = res
            else:
                out = block_reducer(out, res)
        if np.isscalar(out):
            return out
        elif len(out.shape) == 0:
            return out[()]
        else:
            return getattr(storage, create)(out, **kwargs)

    else:
        # first dimension is preserved, no need to reduce blocks
        out = None
        for i in range(0, length, blen):
            j = min(i + blen, length)
            block = data[i:j]
            if mapper:
                block = mapper(block)
            r = reducer(block, axis=axis)
            if out is None:
                out = getattr(storage, create)(r, expectedlen=length, **kwargs)
            else:
                out.append(r)
        return out
コード例 #2
0
ファイル: core.py プロジェクト: hardingnj/scikit-allel
def copy(data, start=0, stop=None, blen=None, storage=None, create='array',
         **kwargs):
    """Copy `data` block-wise into a new array."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    if stop is None:
        stop = len(data)
    else:
        stop = min(stop, len(data))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    out = None
    for i in range(start, stop, blen):
        j = min(i+blen, stop)
        block = np.asanyarray(data[i:j])
        if out is None:
            out = getattr(storage, create)(block, expectedlen=length, **kwargs)
        else:
            out.append(block)

    return out
コード例 #3
0
def copy(data,
         start=0,
         stop=None,
         blen=None,
         storage=None,
         create='array',
         **kwargs):
    """Copy `data` block-wise into a new array."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    if stop is None:
        stop = len(data)
    else:
        stop = min(stop, len(data))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    out = None
    for i in range(start, stop, blen):
        j = min(i + blen, stop)
        block = data[i:j]
        if out is None:
            out = getattr(storage, create)(block, expectedlen=length, **kwargs)
        else:
            out.append(block)

    return out
コード例 #4
0
def compress(condition,
             data,
             axis=0,
             out=None,
             blen=None,
             storage=None,
             create='array',
             **kwargs):
    """Return selected slices of an array along given axis."""

    # setup
    if out is not None:
        # argument is only there for numpy API compatibility
        raise NotImplementedError('out argument is not supported')
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    nnz = count_nonzero(condition)

    if axis == 0:
        _util.check_equal_length(data, condition)

        # block iteration
        out = None
        for i in range(0, length, blen):
            j = min(i + blen, length)
            bcond = np.asarray(condition[i:j])
            # don't access any data unless we have to
            if np.any(bcond):
                block = np.asarray(data[i:j])
                res = np.compress(bcond, block, axis=0)
                if out is None:
                    out = getattr(storage, create)(res,
                                                   expectedlen=nnz,
                                                   **kwargs)
                else:
                    out.append(res)
        return out

    elif axis == 1:

        # block iteration
        out = None
        condition = np.asanyarray(condition)
        for i in range(0, length, blen):
            j = min(i + blen, length)
            block = np.asarray(data[i:j])
            res = np.compress(condition, block, axis=1)
            if out is None:
                out = getattr(storage, create)(res,
                                               expectedlen=length,
                                               **kwargs)
            else:
                out.append(res)

        return out

    else:
        raise NotImplementedError('axis not supported: %s' % axis)
コード例 #5
0
def take(data,
         indices,
         axis=0,
         out=None,
         mode='raise',
         blen=None,
         storage=None,
         create='array',
         **kwargs):
    """Take elements from an array along an axis."""

    # setup
    if out is not None:
        # argument is only there for numpy API compatibility
        raise NotImplementedError('out argument is not supported')
    length = len(data)

    if axis == 0:

        # check that indices are strictly increasing
        indices = np.asanyarray(indices)
        if np.any(indices[1:] <= indices[:-1]):
            raise NotImplementedError('indices must be strictly increasing')

        # implement via compress()
        condition = np.zeros((length, ), dtype=bool)
        condition[indices] = True
        return compress(condition,
                        data,
                        axis=0,
                        blen=blen,
                        storage=storage,
                        create=create,
                        **kwargs)

    elif axis == 1:

        # setup
        storage = _util.get_storage(storage)
        blen = _util.get_blen_array(data, blen)

        # block iteration
        out = None
        for i in range(0, length, blen):
            j = min(i + blen, length)
            block = data[i:j]
            res = np.take(block, indices, axis=1, mode=mode)
            if out is None:
                out = getattr(storage, create)(res,
                                               expectedlen=length,
                                               **kwargs)
            else:
                out.append(res)
        return out

    else:
        raise NotImplementedError('axis not supported: %s' % axis)
コード例 #6
0
ファイル: core.py プロジェクト: hardingnj/scikit-allel
def reduce_axis(data, reducer, block_reducer, mapper=None, axis=None,
                blen=None, storage=None, create='array', **kwargs):
    """Apply an operation to `data` that reduces over one or more axes."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    # normalise axis arg
    if isinstance(axis, int):
        axis = (axis,)

    # deal with 'out' kwarg if supplied, can arise if a chunked array is
    # passed as an argument to numpy.sum(), see also
    # https://github.com/cggh/scikit-allel/issues/66
    kwarg_out = kwargs.pop('out', None)
    if kwarg_out is not None:
        raise ValueError('keyword argument "out" is not supported')

    if axis is None or 0 in axis:
        # two-step reduction
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            if mapper:
                block = mapper(block)
            res = reducer(block, axis=axis)
            if out is None:
                out = res
            else:
                out = block_reducer(out, res)
        if np.isscalar(out):
            return out
        elif len(out.shape) == 0:
            return out[()]
        else:
            return getattr(storage, create)(out, **kwargs)

    else:
        # first dimension is preserved, no need to reduce blocks
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            if mapper:
                block = mapper(block)
            r = reducer(block, axis=axis)
            if out is None:
                out = getattr(storage, create)(r, expectedlen=length, **kwargs)
            else:
                out.append(r)
        return out
コード例 #7
0
ファイル: core.py プロジェクト: hardingnj/scikit-allel
def subset(data, sel0=None, sel1=None, blen=None, storage=None, create='array',
           **kwargs):
    """Return selected rows and columns of an array."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    if sel0 is not None:
        sel0 = np.asanyarray(sel0)
    if sel1 is not None:
        sel1 = np.asanyarray(sel1)

    # ensure boolean array for dim 0
    if sel0 is not None and sel0.dtype.kind != 'b':
        # assume indices, convert to boolean condition
        tmp = np.zeros(length, dtype=bool)
        tmp[sel0] = True
        sel0 = tmp

    # ensure indices for dim 1
    if sel1 is not None and sel1.dtype.kind == 'b':
        # assume boolean condition, convert to indices
        sel1 = np.nonzero(sel1)[0]

    # shortcuts
    if sel0 is None and sel1 is None:
        return copy(data, blen=blen, storage=storage, create=create, **kwargs)
    elif sel1 is None:
        return compress(data, sel0, axis=0, blen=blen, storage=storage,
                        create=create, **kwargs)
    elif sel0 is None:
        return take(data, sel1, axis=1, blen=blen, storage=storage,
                    create=create, **kwargs)

    # build output
    sel0_nnz = count_nonzero(sel0)
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        bsel0 = sel0[i:j]
        # don't access data unless we have to
        if np.any(bsel0):
            block = np.asanyarray(data[i:j])
            res = _ndarray_subset(block, bsel0, sel1)
            if out is None:
                out = getattr(storage, create)(res, expectedlen=sel0_nnz,
                                               **kwargs)
            else:
                out.append(res)

    return out
コード例 #8
0
ファイル: core.py プロジェクト: hardingnj/scikit-allel
def apply(data, f, blen=None, storage=None, create='array', **kwargs):
    """Apply function `f` block-wise over `data`."""

    # setup
    storage = _util.get_storage(storage)
    if isinstance(data, tuple):
        blen = max(_util.get_blen_array(d, blen) for d in data)
    else:
        blen = _util.get_blen_array(data, blen)
    if isinstance(data, tuple):
        _util.check_equal_length(*data)
        length = len(data[0])
    else:
        length = len(data)

    # block-wise iteration
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)

        # obtain blocks
        if isinstance(data, tuple):
            blocks = [np.asanyarray(d[i:j]) for d in data]
        else:
            blocks = [np.asanyarray(data[i:j])]

        # map
        res = f(*blocks)

        # store
        if out is None:
            out = getattr(storage, create)(res, expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out
コード例 #9
0
def map_blocks(data, f, blen=None, storage=None, create='array', **kwargs):
    """Apply function `f` block-wise over `data`."""

    # setup
    storage = _util.get_storage(storage)
    if isinstance(data, tuple):
        blen = max(_util.get_blen_array(d, blen) for d in data)
    else:
        blen = _util.get_blen_array(data, blen)
    if isinstance(data, tuple):
        _util.check_equal_length(*data)
        length = len(data[0])
    else:
        length = len(data)

    # block-wise iteration
    out = None
    for i in range(0, length, blen):
        j = min(i + blen, length)

        # obtain blocks
        if isinstance(data, tuple):
            blocks = [d[i:j] for d in data]
        else:
            blocks = [data[i:j]]

        # map
        res = f(*blocks)

        # store
        if out is None:
            out = getattr(storage, create)(res, expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out
コード例 #10
0
ファイル: core.py プロジェクト: hardingnj/scikit-allel
def compress(data, condition, axis=0, blen=None, storage=None,
             create='array', **kwargs):
    """Return selected slices of an array along given axis."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    nnz = count_nonzero(condition)

    if axis == 0:
        _util.check_equal_length(data, condition)

        # block iteration
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            bcond = np.asanyarray(condition[i:j])
            # don't access any data unless we have to
            if np.any(bcond):
                block = np.asanyarray(data[i:j])
                res = np.compress(bcond, block, axis=0)
                if out is None:
                    out = getattr(storage, create)(res, expectedlen=nnz,
                                                   **kwargs)
                else:
                    out.append(res)
        return out

    elif axis == 1:

        # block iteration
        out = None
        condition = np.asanyarray(condition)
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            res = np.compress(condition, block, axis=1)
            if out is None:
                out = getattr(storage, create)(res, expectedlen=length,
                                               **kwargs)
            else:
                out.append(res)

        return out

    else:
        raise NotImplementedError('axis not supported: %s' % axis)
コード例 #11
0
ファイル: core.py プロジェクト: nvictus/scikit-allel
def reduce_axis(data, reducer, block_reducer, mapper=None, axis=None,
                blen=None, storage=None, create='array', **kwargs):
    """Apply an operation to `data` that reduces over one or more axes."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    # normalise axis arg
    if isinstance(axis, int):
        axis = (axis,)

    if axis is None or 0 in axis:
        # two-step reduction
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            if mapper:
                block = mapper(block)
            res = reducer(block, axis=axis)
            if out is None:
                out = res
            else:
                out = block_reducer(out, res)
        if np.isscalar(out):
            return out
        elif len(out.shape) == 0:
            return out[()]
        else:
            return getattr(storage, create)(out, **kwargs)

    else:
        # first dimension is preserved, no need to reduce blocks
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            if mapper:
                block = mapper(block)
            r = reducer(block, axis=axis)
            if out is None:
                out = getattr(storage, create)(r, expectedlen=length, **kwargs)
            else:
                out.append(r)
        return out
コード例 #12
0
def concatenate(tup,
                axis=0,
                blen=None,
                storage=None,
                create='array',
                **kwargs):
    """Concatenate arrays."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more arrays')

    if axis == 0:

        # build output
        expectedlen = sum(len(a) for a in tup)
        out = None
        for a in tup:
            ablen = _util.get_blen_array(a, blen)
            for i in range(0, len(a), ablen):
                j = min(i + ablen, len(a))
                block = a[i:j]
                if out is None:
                    out = getattr(storage, create)(block,
                                                   expectedlen=expectedlen,
                                                   **kwargs)
                else:
                    out.append(block)

    else:

        def f(*blocks):
            return np.concatenate(blocks, axis=axis)

        out = map_blocks(tup,
                         f,
                         blen=blen,
                         storage=storage,
                         create=create,
                         **kwargs)

    return out
コード例 #13
0
ファイル: core.py プロジェクト: hardingnj/scikit-allel
def store(data, arr, start=0, stop=None, offset=0, blen=None):
    """Copy `data` block-wise into `arr`."""

    # setup
    blen = _util.get_blen_array(data, blen)
    if stop is None:
        stop = len(data)
    else:
        stop = min(stop, len(data))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    for i in range(start, stop, blen):
        j = min(i+blen, stop)
        l = j-i
        arr[offset:offset+l] = data[i:j]
        offset += l
コード例 #14
0
ファイル: core.py プロジェクト: hardingnj/scikit-allel
def take(data, indices, axis=0, blen=None, storage=None,
         create='array', **kwargs):
    """Take elements from an array along an axis."""

    # setup
    length = len(data)

    if axis == 0:

        # check that indices are strictly increasing
        indices = np.asanyarray(indices)
        if np.any(indices[1:] <= indices[:-1]):
            raise NotImplementedError(
                'indices must be strictly increasing'
            )

        # implement via compress()
        condition = np.zeros((length,), dtype=bool)
        condition[indices] = True
        return compress(data, condition, axis=0, blen=blen, storage=storage,
                        create=create, **kwargs)

    elif axis == 1:

        # setup
        storage = _util.get_storage(storage)
        blen = _util.get_blen_array(data, blen)

        # block iteration
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            res = np.take(block, indices, axis=1)
            if out is None:
                out = getattr(storage, create)(res, expectedlen=length,
                                               **kwargs)
            else:
                out.append(res)
        return out

    else:
        raise NotImplementedError('axis not supported: %s' % axis)
コード例 #15
0
def store(data, arr, start=0, stop=None, offset=0, blen=None):
    """Copy `data` block-wise into `arr`."""

    # setup
    blen = _util.get_blen_array(data, blen)
    if stop is None:
        stop = len(data)
    else:
        stop = min(stop, len(data))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    for i in range(start, stop, blen):
        j = min(i + blen, stop)
        l = j - i
        arr[offset:offset + l] = data[i:j]
        offset += l
コード例 #16
0
ファイル: core.py プロジェクト: nvictus/scikit-allel
def subset(data, sel0, sel1, blen=None, storage=None, create='array',
           **kwargs):
    """Return selected rows and columns of an array."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    sel0 = np.asanyarray(sel0)
    sel1 = np.asanyarray(sel1)

    # ensure boolean array for dim 0
    if sel0.shape[0] < length:
        # assume indices, convert to boolean condition
        tmp = np.zeros(length, dtype=bool)
        tmp[sel0] = True
        sel0 = tmp

    # ensure indices for dim 1
    if sel1.shape[0] == data.shape[1]:
        # assume boolean condition, convert to indices
        sel1 = np.nonzero(sel1)[0]

    # build output
    sel0_nnz = count_nonzero(sel0)
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        bsel0 = sel0[i:j]
        # don't access data unless we have to
        if np.any(bsel0):
            block = np.asanyarray(data[i:j])
            res = _ndarray_subset(block, bsel0, sel1)
            if out is None:
                out = getattr(storage, create)(res, expectedlen=sel0_nnz,
                                               **kwargs)
            else:
                out.append(res)

    return out
コード例 #17
0
ファイル: core.py プロジェクト: hardingnj/scikit-allel
def vstack(tup, blen=None, storage=None, create='array', **kwargs):
    """Stack arrays in sequence vertically (row wise)."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more arrays to stack')

    # build output
    expectedlen = sum(len(a) for a in tup)
    out = None
    for a in tup:
        ablen = _util.get_blen_array(a, blen)
        for i in range(0, len(a), ablen):
            j = min(i+ablen, len(a))
            block = np.asanyarray(a[i:j])
            if out is None:
                out = getattr(storage, create)(block, expectedlen=expectedlen,
                                               **kwargs)
            else:
                out.append(block)
    return out
コード例 #18
0
ファイル: core.py プロジェクト: nvictus/scikit-allel
def vstack(tup, blen=None, storage=None, create='array', **kwargs):
    """Stack arrays in sequence vertically (row wise)."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more arrays to stack')

    # build output
    expectedlen = sum(len(a) for a in tup)
    out = None
    for a in tup:
        ablen = _util.get_blen_array(a, blen)
        for i in range(0, len(a), ablen):
            j = min(i+ablen, len(a))
            block = np.asanyarray(a[i:j])
            if out is None:
                out = getattr(storage, create)(block, expectedlen=expectedlen,
                                               **kwargs)
            else:
                out.append(block)
    return out
コード例 #19
0
def subset(data,
           sel0=None,
           sel1=None,
           blen=None,
           storage=None,
           create='array',
           **kwargs):
    """Return selected rows and columns of an array."""

    # TODO refactor sel0 and sel1 normalization with ndarray.subset

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    if sel0 is not None:
        sel0 = np.asanyarray(sel0)
    if sel1 is not None:
        sel1 = np.asanyarray(sel1)

    # ensure boolean array for dim 0
    if sel0 is not None and sel0.dtype.kind != 'b':
        # assume indices, convert to boolean condition
        tmp = np.zeros(length, dtype=bool)
        tmp[sel0] = True
        sel0 = tmp

    # ensure indices for dim 1
    if sel1 is not None and sel1.dtype.kind == 'b':
        # assume boolean condition, convert to indices
        sel1, = np.nonzero(sel1)

    # shortcuts
    if sel0 is None and sel1 is None:
        return copy(data, blen=blen, storage=storage, create=create, **kwargs)
    elif sel1 is None:
        return compress(sel0,
                        data,
                        axis=0,
                        blen=blen,
                        storage=storage,
                        create=create,
                        **kwargs)
    elif sel0 is None:
        return take(data,
                    sel1,
                    axis=1,
                    blen=blen,
                    storage=storage,
                    create=create,
                    **kwargs)

    # build output
    sel0_nnz = count_nonzero(sel0)
    out = None
    for i in range(0, length, blen):
        j = min(i + blen, length)
        bsel0 = sel0[i:j]
        # don't access data unless we have to
        if np.any(bsel0):
            block = data[i:j]
            res = _numpy_subset(block, bsel0, sel1)
            if out is None:
                out = getattr(storage, create)(res,
                                               expectedlen=sel0_nnz,
                                               **kwargs)
            else:
                out.append(res)

    return out