Ejemplo n.º 1
0
    def fromsarray(cls, array: np.ndarray, dtype: Optional[Union[str, type, np.ndarray.dtype]] = None, headerpos: Optional[Union[Sequence[int], np.ndarray]] = None) -> Table:
        _r = re.compile('#<::([<>|]?[biufcmMOSUV]\\d*)::>')
        _findt = lambda x: (lambda v: v[0] if len(v) > 0 else '')(_r.findall(x))

        if missing(headerpos):
            mtab = np.vectorize(_findt)(array[:100,:100])
            dpos = np.c_[np.where(mtab != '')]
            if dpos.shape[0] >= 2: raise ValueError('string array has multiple headers')
            if dpos.shape[0] == 0: raise ValueError('string array has no header in the first 100 rows / cols')
            headerpos = dpos[0]
        rids, cids = headerpos

        if missing(dtype):
            dtype = _findt(array[rids,cids])
            if dtype == '': raise ValueError('unknown array data type')

        ridx = StructuredArray.fromsarray(array[rids:,:cids].T) if cids > 0 else None
        cidx = StructuredArray.fromsarray(array[:rids,cids:])   if rids > 0 else None

        rnam = array[rids+1:,cids]
        if np.all(rnam == smap(range(rnam.shape[0]), lambda x: f'[{x}]')): rnam = None
        cnam = array[rids,cids+1:]
        if np.all(cnam == smap(range(cnam.shape[0]), lambda x: f'[{x}]')): cnam = None

        dmtx = array[rids+1:,cids+1:]
        return Table(dmtx, dtype = dtype, rownames = rnam, colnames = cnam, rowindex = ridx, colindex = cidx)
Ejemplo n.º 2
0
        def _fmt(mtx, rnam, cnam, ridx, cidx):
            nr, nc = mtx.shape

            if missing(rnam): rnam = smap(range(nr), lambda x: f'[{x}]')
            if missing(cnam): cnam = smap(range(nc), lambda x: f'[{x}]')

            _sln  = lambda x,sr,hd,tl,rp: (smap(x[:hd],str) + [rp] + smap(x[tl:],str)) if sr else smap(x, str)
            _scol = lambda x: _sln(x, nc > strinkcols, 3, -1, ' ... ')
            _srow = lambda x: _sln(x, nr > strinkrows, 5, -3, '')

            slns = [_scol(cnam)] + \
                  ([_scol(ln) for ln in mtx] if nr <= strinkrows else
                  ([_scol(ln) for ln in mtx[:5]] + [_scol([' ... ... '] + [''] * (nc-1))] + [_scol(ln) for ln in mtx[-3:]]))
            slns = [['#'] + slns[0]] + [[n] + ln for n,ln in zip(_srow(rnam), slns[1:])]

            nri = ridx.size if available(ridx) else 0
            nci = cidx.size if available(cidx) else 0

            if nci > 0: slns = [[f'<{k}>'] + _scol(cidx[k]) for k in cidx.names] + slns
            if nri > 0:
                sidx = [[''] * nci + [f'<{k}>'] + _srow(ridx[k]) for k in ridx.names]
                slns = [list(ix) + ln for ix,ln in zip(zip(*sidx), slns)]

            def _sfmt(lns, pos):
                size = max(collapse(smap(lns, lambda x: smap(x[pos], lambda v: len(v) if v not in (' ... ', ' ... ... ') else 0)))) + 1
                for ln in lns: ln[pos] = smap(ln[pos], lambda x: '{0:>{1}s}'.format(x, size) if x != ' ... ' else x)
                return lns

            if nri > 0: slns = _sfmt(slns, slice(None,nri))
            slns = _sfmt(slns, slice(nri,nri+1))
            slns = _sfmt(slns, slice(nri+1,None))

            return smap(slns, lambda ln: paste(ln, sep = delimiter))
Ejemplo n.º 3
0
 def asMatrix(val: Iterable[Iterable],
              nrow: Optional[int] = None,
              ncol: Optional[int] = None,
              rownames: Optional[Iterable] = None,
              colnames: Optional[Iterable] = None) -> robj.Matrix:
     if not (isinstance(val, np.ndarray) and val.ndim == 2):
         val = np.asarray(smap(val, ll))
     if missing(nrow) and missing(ncol): nrow, ncol = val.shape
     matx = robj.r.matrix(val, nrow=nrow, ncol=ncol)
     if available(rownames):
         matx.rownames = robj.StrVector(np.asarray(ll(rownames), dtype=str))
     if available(colnames):
         matx.colnames = robj.StrVector(np.asarray(ll(colnames), dtype=str))
     return matx
Ejemplo n.º 4
0
 def execute(self, query: str) -> SQLiteWrapper:
     if missing(self._dbconn): raise IOError('database not connected')
     try:
         self._dbconn.execute(query)
     except Exception as e:
         logging.warning('sqlite execution failed: %s', str(e))
     return self
Ejemplo n.º 5
0
 def apply(func: str, *args: Any, **kwargs: Any) -> Any:
     args = pickmap(args, missing, robj.NULL)
     kwargs = {
         k: (robj.NULL if missing(v) else v)
         for k, v in kwargs.items()
     }
     return getattr(robj.r, func)(*args, **kwargs)
Ejemplo n.º 6
0
    def put(self,
            pos: Indices2D,
            value: Any,
            axis: Optional[int] = 0,
            inline: bool = False) -> StructuredArray:
        narr = self if inline else self.copy()
        vals = self._parsevals(value)

        if isstring(pos):
            if not isinstance(vals, np.ndarray):
                raise ValueError('input array not in 1-dimensional')
            if missing(narr._length): narr._length = vals.shape[0]
            elif narr._length != vals.shape[0]:
                raise ValueError('input array size not match')
            narr._arrs[pos] = vals.copy()
        else:
            sids, aids = self._parseids(pos, axis=axis)
            if not isinstance(vals, list):
                for k in sids:
                    narr._arrs[k][aids] = vals
            else:
                if len(sids) != len(vals):
                    raise ValueError('input names and values size not match')
                for k, vals in zip(sids, vals):
                    narr._arrs[k][aids] = vals
        return narr
Ejemplo n.º 7
0
    def _exec(self, pms):
        params, stdin, timeout = pms  # for multiproc

        exlst = [self._bin] + ([] if missing(params) else smap(
            params, lambda x: str(x).strip()))
        if self._shell:
            exlst = paste(smap(exlst, lambda x: x.replace(' ', r'\ ')),
                          sep=' ')

        procs = Popen(exlst,
                      stdin=PIPE,
                      stdout=PIPE,
                      stderr=PIPE,
                      shell=self._shell)
        try:
            rvals = procs.communicate(input=stdin, timeout=timeout)
            rstrs = smap(
                rvals, lambda x: ''
                if x is None else x.decode('utf-8').strip())
            rcode = procs.returncode
        except TimeoutExpired:
            procs.kill()
            rstrs = ['subprocess terminated as timeout expired', '']
            rcode = 124

        prstr = paste(rstrs, sep=' | ')
        if rcode in self._ncode:
            logging.log((logging.DEBUG if self._mute else logging.INFO), prstr)
        else:
            raise RuntimeError(f'execution failed [{rcode}]:\n{prstr}')
        return rcode, rstrs
Ejemplo n.º 8
0
    def delete(self,
               pos: Indices2D,
               axis: Optional[int] = 0,
               inline: bool = False) -> StructuredArray:
        narr = self if inline else self.copy()
        if isstring(pos):
            del narr._arrs[pos]
            return narr

        sids, aids = self._parseids(pos, axis=axis, mapslice=False)
        slic = isinstance(sids, slice) and sids == slice(None)
        alic = isinstance(aids,
                          slice) and aids == slice(None) and (missing(axis)
                                                              or axis == 0)

        if slic and alic:
            narr._arrs = OrderedDict()
            narr._length = None
        elif slic and not alic:
            if listable(aids) and len(aids) == 1 and aids[0] < 0:
                aids = aids[
                    0]  # fix the issue that currently negative indices are ignored by np.delete
            for k, v in narr._arrs.items():
                narr._arrs[k] = np.delete(v, aids)
            narr._length = len(narr._arrs[l(narr._arrs.keys())[0]])
        elif not slic and alic:
            if isinstance(sids, slice) or sids.dtype.kind not in ('S', 'U'):
                sids = narr.names[sids]
            for k in sids:
                del narr._arrs[k]
        else:
            raise IndexError('unable to delete portion of the array')

        return narr
Ejemplo n.º 9
0
    def __init__(self,
                 items: Optional[Union[Iterable, Mapping, np.ndarray,
                                       StructuredArray]] = None,
                 **kwargs: Iterable):
        if isinstance(items, StructuredArray):
            self._arrs, self._length = items._arrs.copy(), items._length
            return

        vals = [(k, items[k]) for k in items.dtype.names] if isinstance(items, np.ndarray) and available(items.dtype.names) else \
               items.items()  if ismapping(items) else \
               items          if iterable(items) else \
               kwargs.items() if missing(items) else None
        if missing(vals): raise TypeError('unknow data type')

        self._arrs = OrderedDict()
        self._length = None
        for k, v in vals:
            self[k] = v
Ejemplo n.º 10
0
 def close(self, commit: bool = True) -> SQLiteWrapper:
     if missing(self._dbconn):
         logging.warning(
             'connection to database [%s] already closed, ignore',
             fileTitle(self._dbfile))
     else:
         if commit: self._dbconn.commit()
         self._dbconn.close()
         self._dbconn = None
     return self
Ejemplo n.º 11
0
 def insert(self,
            pos: Indices,
            value: Union[str, Iterable[str]],
            inline: bool = False) -> NamedIndex:
     if missing(pos): return self.append(value, inline)
     pos = self._parseids(pos)
     val = self._parsevals(value)
     nid = self if inline else self.copy()
     nid._names = np.insert(nid._names, pos, val)
     nid._reindex()
     return nid
Ejemplo n.º 12
0
 def __init__(self,
              binary: Union[str, Path],
              shell: bool = False,
              normcodes: Union[int, Iterable[int]] = 0,
              mute: bool = False):
     self._bin = self.which(binary)
     if missing(self._bin):
         raise RuntimeError(f'binary executable [{binary}] not reachable')
     self._shell = shell
     self._ncode = (normcodes, ) if isinstance(normcodes,
                                               int) else ll(normcodes)
     self._mute = mute
Ejemplo n.º 13
0
    def mapexec(self,
                params: Optional[Iterable[Sequence]] = None,
                stdin: Optional[Iterable[Union[bytes, str]]] = None,
                timeout: Optional[int] = None,
                nthreads: Optional[int] = None,
                nprocs: Optional[int] = None) -> List[Tuple[int, List[str]]]:
        if available(params): params = ll(params)
        if available(stdin): stdin = ll(stdin)
        if available(params) and available(
                stdin) and len(params) != len(stdin):
            raise RuntimeError('parameters and stdins size not match')
        if missing(params) and missing(stdin):
            raise RuntimeError('both parameters and stdins are missing')

        n = len(params)
        mpms = [(p, s, timeout)
                for p, s in zip(optional(params, [None] *
                                         n), optional(stdin, [None] * n))]
        _map = partial(pmap, nprocs = nprocs) if available(nprocs) else \
               partial(tmap, nthreads = nthreads) if available(nthreads) else smap
        return _map(mpms, self._exec)
Ejemplo n.º 14
0
 def idsof(self,
           names: Union[str, Iterable[str]],
           safe: bool = False) -> Union[None, int, List[Union[None, int]]]:
     if isstring(names):
         ids = self._nidct.get(names, None)
         if not safe and missing(ids):
             raise KeyError(f'unknown index name {names}')
     else:
         ids = [
             self._nidct.get(n, None) if isstring(n) else n for n in names
         ]
         if not safe and checkany(ids, missing):
             raise KeyError('unknown index name(s)')
     return ids
Ejemplo n.º 15
0
 def asVector(val: Iterable,
              names: Optional[Iterable] = None) -> robj.Vector:
     val = np.asarray(ll(val))
     vect = {
         'i': robj.IntVector,
         'u': robj.IntVector,
         'f': robj.FloatVector,
         'b': robj.BoolVector,
         'S': robj.StrVector,
         'U': robj.StrVector,
     }.get(val.dtype.kind, lambda x: None)(val)
     if missing(vect):
         raise TypeError(f'unknown vector type [{val.dtype.kind}]')
     if available(names):
         vect.names = robj.StrVector(np.asarray(ll(names), dtype=str))
     return vect
Ejemplo n.º 16
0
    def _parseids(self, idx, axis = None, mapslice = True):
        if missing(axis):
            rids, cids = (idx, slice(None)) if not isinstance(idx, tuple) else \
                         (idx[0], slice(None)) if len(idx) == 1 else idx
        else:
            if isinstance(idx, tuple): raise IndexError('too many dimensions for array')
            if axis not in (0, 1): raise IndexError('invalid axis value')
            rids, cids = (idx, slice(None)) if axis == 0 else (slice(None), idx)

        def _wrap(ids, num, names):
            if ids is None: return slice(None) if not mapslice else np.arange(num)
            if isinstance(ids, slice): return ids if not mapslice else np.arange(num)[ids]
            return self._mapids(ids, names)

        rids = _wrap(rids, self.nrow, self._rnames)
        cids = _wrap(cids, self.ncol, self._cnames)
        return rids, cids
Ejemplo n.º 17
0
    def saverdata(self, fname, *, dataobj: str = 'data.matrix',
                  ridxobj: Optional[str] = 'row.index', cidxobj: Optional[str] = 'col.index', transpose: bool = True) -> bool:
        if missing(rw): raise RuntimeError('RWrapper not available for this installation')
        checkOutputFile(fname)

        dm, rn, cn, ri, ci = (self._dmatx,   self._rnames, self._cnames, self._rindex, self._cindex) if not transpose else \
                             (self._dmatx.T, self._cnames, self._rnames, self._cindex, self._rindex)

        dmtx = rw.asMatrix(dm, rownames = rn, colnames = cn)
        rw.assign(dmtx, dataobj)

        if available(ri): rw.assign(rw.r['data.frame'](**{k: rw.asVector(v) for k,v in ri.fields}), ridxobj)
        if available(ci): rw.assign(rw.r['data.frame'](**{k: rw.asVector(v) for k,v in ci.fields}), cidxobj)

        vnames = [dataobj] + ([ridxobj] if available(ri) else []) + ([cidxobj] if available(ci) else [])
        rw.run(f'save({paste(vnames, sep = ",")}, file = "{fname}")') # avoid bug in rw.save
        return os.path.isfile(fname)
Ejemplo n.º 18
0
    def loadrdata(cls, fname: Union[str, Path], dataobj: str, *,
                  ridxobj: Optional[str] = None, cidxobj: Optional[str] = None, transposed: bool = True) -> Table:
        if missing(rw): raise RuntimeError('RWrapper not available for this installation')
        checkInputFile(fname)
        rw.r.load(fname)

        dm, rn, cn = np.array(rw.r[dataobj]), rw.run(f'rownames({dataobj})'), rw.run(f'colnames({dataobj})') # stupid numpy conversion
        rn = None if rn is rw.null else np.array(rn)
        cn = None if cn is rw.null else np.array(cn)

        def _parseidx(iname):
            idx = rw.r[iname]
            return zip(idx.dtype.names, zip(*idx))
        ri = _parseidx(ridxobj) if available(ridxobj) else None
        ci = _parseidx(cidxobj) if available(cidxobj) else None

        if transposed: dm, rn, cn, ri, ci = dm.T, cn, rn, ci, ri
        ntab = Table(dm, rownames = rn, colnames = cn, rowindex = ri, colindex = ci)
        return ntab
Ejemplo n.º 19
0
    def _parseids(self, idx, axis=None, mapslice=True):
        if missing(axis):
            sids, aids = (idx, slice(None)) if not isinstance(idx, tuple) else \
                         (idx[0], slice(None)) if len(idx) == 1 else idx
        else:
            if isinstance(idx, tuple):
                raise IndexError('too many dimensions for array')
            if axis not in (0, 1): raise IndexError('invalid axis value')
            sids, aids = (idx, slice(None)) if axis == 0 else (slice(None),
                                                               idx)

        def _wrap(ids):
            if ids is None: return slice(None)
            if isinstance(ids, slice): return ids
            if not listable(ids): return [ids]
            return ids

        sids, aids = smap((sids, aids), _wrap)

        if (isinstance(sids, slice) and mapslice) or (
                listable(sids) and checkany(sids, lambda x: not isstring(x))):
            sids = self.names[sids]
        return sids, aids
Ejemplo n.º 20
0
 def __array_wrap__(self, arr):
     if missing(arr.dtype.names):
         raise TypeError(
             'cannot assign non-structured ndarray to StructuredArray')
     return StructuredArray(arr)
Ejemplo n.º 21
0
 def _mapids(ids, names):
     if isinstance(ids, NamedIndex): ids = np.array(ids)
     if not listable(ids): ids = [ids]
     if not checkany(ids, isstring): return ids
     if missing(names): raise KeyError('table names not set')
     return names.idsof(ids, safe = False)
Ejemplo n.º 22
0
    def insert(self, pos: Indices, value: Table, axis: int = 0, inline: bool = False) -> Table:
        if not isinstance(value, Table): raise TypeError('unknown input data type')

        ntab = self if inline else self.copy()
        if axis == 0:
            if value.ncol != ntab.ncol: raise IndexError('input table has different number of columns')
            if available(pos): pos = self._mapids(pos, self._rnames)
            ntab._dmatx = np.vstack([ntab._dmatx, value._dmatx.astype(ntab.dtype)]) if missing(pos) else \
                          np.insert(ntab._dmatx, pos, value._dmatx.astype(ntab.dtype), axis = 0)
            if available(ntab._cnames) and available(value._cnames) and np.any(value._cnames != ntab._cnames): raise IndexError('input table has different column names')
            if available(ntab._cindex) and available(value._cindex) and value._cindex != ntab._cindex: raise IndexError('input table has different column index')
            if available(ntab._rnames): ntab._rnames.insert(pos, value._rnames, inline = True)
            if available(ntab._rindex): ntab._rindex.insert(pos, value._rindex, inline = True)
        elif axis == 1:
            if value.nrow != ntab.nrow: raise IndexError('input table has different number of rows')
            if available(pos): pos = self._mapids(pos, self._cnames)
            ntab._dmatx = np.hstack([ntab._dmatx, value._dmatx.astype(ntab.dtype)]) if missing(pos) else \
                          np.insert(ntab._dmatx, pos, value._dmatx.astype(ntab.dtype), axis = 1)
            if available(ntab._rnames) and available(value._rnames) and np.any(value._rnames != ntab._rnames): raise IndexError('input table has different row names')
            if available(ntab._rindex) and available(value._rindex) and value._rindex != self._rindex: raise IndexError('input table has different row index')
            if available(ntab._cnames): ntab._cnames.insert(pos, value._cnames, inline = True)
            if available(ntab._cindex): ntab._cindex.insert(pos, value._cindex, inline = True)
        else: raise IndexError(f'unsupported axis [{axis}]')

        return ntab
Ejemplo n.º 23
0
 def cidx_(self, value):
     if missing(value): self._cindex = None; return
     self._cindex = StructuredArray(value)
     if self._cindex.size != 0 and self._cindex.length != self.ncol: raise ValueError('input column index size not match')
Ejemplo n.º 24
0
 def ridx_(self, value):
     if missing(value): self._rindex = None; return
     self._rindex = StructuredArray(value)
     if self._rindex.size != 0 and self._rindex.length != self.nrow: raise ValueError('input row index size not match')
Ejemplo n.º 25
0
 def cols_(self, value):
     if missing(value): self._cnames = None; return
     self._cnames = NamedIndex(value)
     if self._cnames.size != self.ncol: raise ValueError('input column names size not match')
Ejemplo n.º 26
0
 def rows_(self, value):
     if missing(value): self._rnames = None; return
     self._rnames = NamedIndex(value)
     if self._rnames.size != self.nrow: raise ValueError('input row names size not match')
Ejemplo n.º 27
0
 def commit(self) -> SQLiteWrapper:
     if missing(self._dbconn): raise IOError('database not connected')
     self._dbconn.commit()
     return self