Example #1
0
def copy_table(tbl, start=0, stop=None, blen=None, storage=None,
               create='table', **kwargs):
    """Copy `tbl` block-wise into a new table."""

    # setup
    names, columns = _util.check_table_like(tbl)
    storage = _util.get_storage(storage)
    blen = _util.get_blen_table(tbl, blen)
    if stop is None:
        stop = len(columns[0])
    else:
        stop = min(stop, len(columns[0]))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    out = None
    for i in range(start, stop, blen):
        j = min(i+blen, stop)
        res = [np.asanyarray(c[i:j]) for c in columns]
        if out is None:
            out = getattr(storage, create)(res, names=names,
                                           expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out
Example #2
0
def compress_table(tbl, condition, blen=None, storage=None, create='table',
                   **kwargs):
    """Return selected rows of a table."""

    # setup
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    blen = _util.get_blen_table(tbl, blen)
    _util.check_equal_length(columns[0], condition)
    length = len(columns[0])
    nnz = count_nonzero(condition)

    # block iteration
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        bcond = np.asanyarray(condition[i:j])
        # don't access any data unless we have to
        if np.any(bcond):
            bcolumns = [np.asanyarray(c[i:j]) for c in columns]
            res = [np.compress(bcond, c, axis=0) for c in bcolumns]
            if out is None:
                out = getattr(storage, create)(res, names=names,
                                               expectedlen=nnz, **kwargs)
            else:
                out.append(res)
    return out
Example #3
0
def concatenate_table(tup, blen=None, storage=None, create='table', **kwargs):
    """Stack tables in sequence vertically (row-wise)."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more tables to stack')

    # build output
    expectedlen = sum(len(t) for t in tup)
    out = None
    tnames = None
    for tdata in tup:
        tblen = _util.get_blen_table(tdata, blen)
        tnames, tcolumns = _util.check_table_like(tdata, names=tnames)
        tlen = len(tcolumns[0])
        for i in range(0, tlen, tblen):
            j = min(i + tblen, tlen)
            bcolumns = [c[i:j] for c in tcolumns]
            if out is None:
                out = getattr(storage, create)(bcolumns,
                                               names=tnames,
                                               expectedlen=expectedlen,
                                               **kwargs)
            else:
                out.append(bcolumns)
    return out
Example #4
0
def compress_table(tbl, condition, blen=None, storage=None, create='table',
                   **kwargs):
    """Return selected rows of a table."""

    # setup
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    blen = _util.get_blen_table(tbl, blen)
    _util.check_equal_length(columns[0], condition)
    length = len(columns[0])
    nnz = count_nonzero(condition)

    # block iteration
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        bcond = np.asanyarray(condition[i:j])
        # don't access any data unless we have to
        if np.any(bcond):
            bcolumns = [np.asanyarray(c[i:j]) for c in columns]
            res = [np.compress(bcond, c, axis=0) for c in bcolumns]
            if out is None:
                out = getattr(storage, create)(res, names=names,
                                               expectedlen=nnz, **kwargs)
            else:
                out.append(res)
    return out
Example #5
0
    def table(self, data, names=None, expectedlen=None, **kwargs):

        # setup
        names, columns = _util.check_table_like(data, names=names)

        # obtain group
        h5g = kwargs.pop('group', None)
        if h5g is None:
            # open file, use root group
            h5g, kwargs = self.open_file(**kwargs)

        # create columns
        for n, c in zip(names, columns):
            self.create_dataset(h5g,
                                data=c,
                                name=n,
                                expectedlen=expectedlen,
                                **kwargs)

        # patch in append method
        h5g.append = MethodType(_table_append, h5g)

        # patch in names attribute
        h5g.names = names

        return h5g
Example #6
0
def copy_table(tbl,
               start=0,
               stop=None,
               blen=None,
               storage=None,
               create='table',
               **kwargs):
    """Copy `tbl` block-wise into a new table."""

    # setup
    names, columns = _util.check_table_like(tbl)
    storage = _util.get_storage(storage)
    blen = _util.get_blen_table(tbl, blen)
    if stop is None:
        stop = len(columns[0])
    else:
        stop = min(stop, len(columns[0]))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    out = None
    for i in range(start, stop, blen):
        j = min(i + blen, stop)
        res = [c[i:j] for c in columns]
        if out is None:
            out = getattr(storage, create)(res,
                                           names=names,
                                           expectedlen=length,
                                           **kwargs)
        else:
            out.append(res)

    return out
Example #7
0
 def __init__(self, data, names=None):
     names, columns = _util.check_table_like(data, names=names)
     # skip super-class constructor because we are more flexible about type of values here
     self._values = data
     self._names = names
     self._columns = columns
     self.rowcls = namedtuple('row', names)
Example #8
0
def vstack_table(tup, blen=None, storage=None, create='table', **kwargs):
    """Stack tables in sequence vertically (row-wise)."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more tables to stack')

    # build output
    expectedlen = sum(len(t) for t in tup)
    out = None
    tnames = None
    for tdata in tup:
        tblen = _util.get_blen_table(tdata, blen)
        tnames, tcolumns = _util.check_table_like(tdata, names=tnames)
        tlen = len(tcolumns[0])
        for i in range(0, tlen, tblen):
            j = min(i+tblen, tlen)
            bcolumns = [np.asanyarray(c[i:j]) for c in tcolumns]
            if out is None:
                out = getattr(storage, create)(bcolumns, names=tnames,
                                               expectedlen=expectedlen,
                                               **kwargs)
            else:
                out.append(bcolumns)
    return out
Example #9
0
    def table(self, data, names=None, expectedlen=None, **kwargs):

        # setup
        names, columns = _util.check_table_like(data, names=names)
        kwargs = self._set_defaults(kwargs)
        chunks = kwargs.pop('chunks', None)
        g = zarr.group(**kwargs)

        # create columns
        for n, c in zip(names, columns):
            if chunks is None:
                chunks = default_chunks(c, expectedlen)
            if c.dtype == object:
                # peek at first value
                peek = c[0]
                if isinstance(peek, bytes):
                    object_codec = numcodecs.VLenBytes()
                elif isinstance(peek, str):
                    object_codec = numcodecs.VLenUTF8()
                else:
                    object_codec = numcodecs.MsgPack()
            else:
                object_codec = None
            g.array(name=n, data=c, chunks=chunks, object_codec=object_codec)

        # create table
        ztbl = ZarrTable(g, names=names)
        return ztbl
Example #10
0
 def table(self, data, names=None, expectedlen=None, **kwargs):
     names, columns = _util.check_table_like(data, names=names)
     kwargs = self._set_defaults(kwargs)
     ctbl = bcolz.ctable(columns, names=names, expectedlen=expectedlen,
                         **kwargs)
     # patch append method
     ctbl.append_original = ctbl.append
     ctbl.append = MethodType(_table_append, ctbl)
     return ctbl
Example #11
0
 def table(self, data, names=None, expectedlen=None, **kwargs):
     names, columns = _util.check_table_like(data, names=names)
     kwargs = self._set_defaults(kwargs)
     ctbl = bcolz.ctable(columns,
                         names=names,
                         expectedlen=expectedlen,
                         **kwargs)
     # patch append method
     ctbl.append_original = ctbl.append
     ctbl.append = MethodType(_table_append, ctbl)
     return ctbl
Example #12
0
def eval_table(tbl,
               expression,
               vm='python',
               blen=None,
               storage=None,
               create='array',
               vm_kwargs=None,
               **kwargs):
    """Evaluate `expression` against columns of a table."""

    # setup
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    length = len(columns[0])
    if vm_kwargs is None:
        vm_kwargs = dict()

    # setup vm
    if vm == 'numexpr':
        import numexpr
        evaluate = numexpr.evaluate
    elif vm == 'python':
        # noinspection PyUnusedLocal
        def evaluate(expr, local_dict=None, **kw):
            # takes no keyword arguments
            return eval(expr, dict(), local_dict)
    else:
        raise ValueError('expected vm either "numexpr" or "python"')

    # compile expression and get required columns
    variables = _get_expression_variables(expression, vm)
    required_columns = {v: columns[names.index(v)] for v in variables}

    # determine block size for evaluation
    blen = _util.get_blen_table(required_columns, blen=blen)

    # build output
    out = None
    for i in range(0, length, blen):
        j = min(i + blen, length)
        blocals = {v: c[i:j] for v, c in required_columns.items()}
        res = evaluate(expression, local_dict=blocals, **vm_kwargs)
        if out is None:
            out = getattr(storage, create)(res, expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out
Example #13
0
    def table(self, data, names=None, expectedlen=None, **kwargs):

        # setup
        names, columns = _util.check_table_like(data, names=names)
        kwargs = self._set_defaults(kwargs)
        g = zarr.group(**kwargs)

        # create columns
        chunks = kwargs.get('chunks', None)
        for n, c in zip(names, columns):
            if chunks is None:
                chunks = default_chunks(c, expectedlen)
            g.array(name=n, data=c, chunks=chunks)

        # create table
        ztbl = ZarrTable(g, names=names)
        return ztbl
Example #14
0
def eval_table(tbl, expression, vm='python', blen=None, storage=None,
               create='array', vm_kwargs=None, **kwargs):
    """Evaluate `expression` against columns of a table."""

    # setup
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    length = len(columns[0])
    if vm_kwargs is None:
        vm_kwargs = dict()

    # setup vm
    if vm == 'numexpr':
        import numexpr
        evaluate = numexpr.evaluate
    elif vm == 'python':
        # noinspection PyUnusedLocal
        def evaluate(expr, local_dict=None, **kw):
            # takes no keyword arguments
            return eval(expr, dict(), local_dict)
    else:
        raise ValueError('expected vm either "numexpr" or "python"')

    # compile expression and get required columns
    variables = _get_expression_variables(expression, vm)
    required_columns = {v: columns[names.index(v)] for v in variables}

    # determine block size for evaluation
    blen = _util.get_blen_table(required_columns, blen=blen)

    # build output
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        blocals = {v: c[i:j] for v, c in required_columns.items()}
        res = evaluate(expression, local_dict=blocals, **vm_kwargs)
        if out is None:
            out = getattr(storage, create)(res, expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out
Example #15
0
    def table(self, data, names=None, expectedlen=None, **kwargs):
        # ignore expectedlen

        # setup
        names, columns = _util.check_table_like(data, names=names)
        kwargs = self._set_defaults(kwargs)
        path = kwargs.get('path', None)
        zcols = list()

        # create columns
        for n, c in zip(names, columns):
            col_kwargs = kwargs.copy()
            if path:
                col_kwargs['path'] = os.path.join(path, n)
            zcol = self._create_array(c, **col_kwargs)
            zcols.append(zcol)

        # create table
        ztbl = ZarrTable(names, zcols)
        return ztbl
Example #16
0
def take_table(tbl, indices, blen=None, storage=None, create='table',
               **kwargs):
    """Return selected rows of a table."""

    # setup
    names, columns = _util.check_table_like(tbl)
    length = len(columns[0])

    # check that indices are strictly increasing
    indices = np.asanyarray(indices)
    if np.any(indices[1:] <= indices[:-1]):
        raise NotImplementedError(
            'indices must be strictly increasing'
        )

    # implement via compress()
    condition = np.zeros((length,), dtype=bool)
    condition[indices] = True
    return compress_table(tbl, condition, blen=blen, storage=storage,
                          create=create, **kwargs)
Example #17
0
def take_table(tbl, indices, blen=None, storage=None, create='table',
               **kwargs):
    """Return selected rows of a table."""

    # setup
    names, columns = _util.check_table_like(tbl)
    length = len(columns[0])

    # check that indices are strictly increasing
    indices = np.asanyarray(indices)
    if np.any(indices[1:] <= indices[:-1]):
        raise NotImplementedError(
            'indices must be strictly increasing'
        )

    # implement via compress()
    condition = np.zeros((length,), dtype=bool)
    condition[indices] = True
    return compress_table(tbl, condition, blen=blen, storage=storage,
                          create=create, **kwargs)
Example #18
0
def compress_table(condition,
                   tbl,
                   axis=None,
                   out=None,
                   blen=None,
                   storage=None,
                   create='table',
                   **kwargs):
    """Return selected rows of a table."""

    # setup
    if axis is not None and axis != 0:
        raise NotImplementedError('only axis 0 is supported')
    if out is not None:
        # argument is only there for numpy API compatibility
        raise NotImplementedError('out argument is not supported')
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    blen = _util.get_blen_table(tbl, blen)
    _util.check_equal_length(columns[0], condition)
    length = len(columns[0])
    nnz = count_nonzero(condition)

    # block iteration
    out = None
    for i in range(0, length, blen):
        j = min(i + blen, length)
        bcond = condition[i:j]
        # don't access any data unless we have to
        if np.any(bcond):
            bcolumns = [c[i:j] for c in columns]
            res = [np.compress(bcond, c, axis=0) for c in bcolumns]
            if out is None:
                out = getattr(storage, create)(res,
                                               names=names,
                                               expectedlen=nnz,
                                               **kwargs)
            else:
                out.append(res)
    return out
Example #19
0
    def table(self, data, names=None, expectedlen=None, **kwargs):
        # ignore expectedlen for now

        # setup
        names, columns = _util.check_table_like(data, names=names)

        # obtain group
        h5g = kwargs.pop("group", None)
        if h5g is None:
            # open file, use root group
            h5g, kwargs = self.open_file(**kwargs)

        # create columns
        for n, c in zip(names, columns):
            self.create_dataset(h5g, data=c, name=n, **kwargs)

        # patch in append method
        h5g.append = MethodType(_table_append, h5g)

        # patch in names attribute
        h5g.names = names

        return h5g
Example #20
0
def take_table(tbl,
               indices,
               axis=None,
               out=None,
               mode='raise',
               blen=None,
               storage=None,
               create='table',
               **kwargs):
    """Return selected rows of a table."""

    # setup
    if axis is not None and axis != 0:
        raise NotImplementedError('only axis 0 is supported')
    if out is not None:
        # argument is only there for numpy API compatibility
        raise NotImplementedError('out argument is not supported')
    if mode is not None and mode != 'raise':
        raise NotImplementedError('only mode=raise is supported')
    names, columns = _util.check_table_like(tbl)
    length = len(columns[0])

    # check that indices are strictly increasing
    indices = np.asanyarray(indices)
    if np.any(indices[1:] <= indices[:-1]):
        raise NotImplementedError('indices must be strictly increasing')

    # implement via compress()
    condition = np.zeros((length, ), dtype=bool)
    condition[indices] = True
    return compress_table(condition,
                          tbl,
                          blen=blen,
                          storage=storage,
                          create=create,
                          **kwargs)
Example #21
0
 def __init__(self, data, names=None):
     names, columns = _util.check_table_like(data, names=names)
     self.data = data
     self.names = names
     self.columns = columns
     self.rowcls = namedtuple('row', names)
Example #22
0
 def append(self, data):
     _, columns = _util.check_table_like(data, names=self.names)
     for co, cn in zip(self.columns, columns):
         co.append(cn)
Example #23
0
def _table_append(h5g, data):
    names, columns = _util.check_table_like(data, names=h5g.names)
    for n, c in zip(names, columns):
        h5d = h5g[n]
        _dataset_append(h5d, c)
Example #24
0
def _table_append(h5g, data):
    names, columns = _util.check_table_like(data, names=h5g.names)
    for n, c in zip(names, columns):
        h5d = h5g[n]
        _dataset_append(h5d, c)
Example #25
0
 def __init__(self, data, names=None):
     names, columns = _util.check_table_like(data, names=names)
     self.data = data
     self.names = names
     self.columns = columns
     self.rowcls = namedtuple('row', names)
Example #26
0
 def append(self, data):
     names, columns = _util.check_table_like(data, names=self.names)
     for n, c in zip(names, columns):
         self.grp[n].append(c)