Esempio n. 1
0
    def fromsarray(cls, array: np.ndarray, dtype: Optional[Union[str, type, np.ndarray.dtype]] = None, headerpos: Optional[Union[Sequence[int], np.ndarray]] = None) -> Table:
        _r = re.compile('#<::([<>|]?[biufcmMOSUV]\\d*)::>')
        _findt = lambda x: (lambda v: v[0] if len(v) > 0 else '')(_r.findall(x))

        if missing(headerpos):
            mtab = np.vectorize(_findt)(array[:100,:100])
            dpos = np.c_[np.where(mtab != '')]
            if dpos.shape[0] >= 2: raise ValueError('string array has multiple headers')
            if dpos.shape[0] == 0: raise ValueError('string array has no header in the first 100 rows / cols')
            headerpos = dpos[0]
        rids, cids = headerpos

        if missing(dtype):
            dtype = _findt(array[rids,cids])
            if dtype == '': raise ValueError('unknown array data type')

        ridx = StructuredArray.fromsarray(array[rids:,:cids].T) if cids > 0 else None
        cidx = StructuredArray.fromsarray(array[:rids,cids:])   if rids > 0 else None

        rnam = array[rids+1:,cids]
        if np.all(rnam == smap(range(rnam.shape[0]), lambda x: f'[{x}]')): rnam = None
        cnam = array[rids,cids+1:]
        if np.all(cnam == smap(range(cnam.shape[0]), lambda x: f'[{x}]')): cnam = None

        dmtx = array[rids+1:,cids+1:]
        return Table(dmtx, dtype = dtype, rownames = rnam, colnames = cnam, rowindex = ridx, colindex = cidx)
Esempio n. 2
0
        def _fmt(mtx, rnam, cnam, ridx, cidx):
            nr, nc = mtx.shape

            if missing(rnam): rnam = smap(range(nr), lambda x: f'[{x}]')
            if missing(cnam): cnam = smap(range(nc), lambda x: f'[{x}]')

            _sln  = lambda x,sr,hd,tl,rp: (smap(x[:hd],str) + [rp] + smap(x[tl:],str)) if sr else smap(x, str)
            _scol = lambda x: _sln(x, nc > strinkcols, 3, -1, ' ... ')
            _srow = lambda x: _sln(x, nr > strinkrows, 5, -3, '')

            slns = [_scol(cnam)] + \
                  ([_scol(ln) for ln in mtx] if nr <= strinkrows else
                  ([_scol(ln) for ln in mtx[:5]] + [_scol([' ... ... '] + [''] * (nc-1))] + [_scol(ln) for ln in mtx[-3:]]))
            slns = [['#'] + slns[0]] + [[n] + ln for n,ln in zip(_srow(rnam), slns[1:])]

            nri = ridx.size if available(ridx) else 0
            nci = cidx.size if available(cidx) else 0

            if nci > 0: slns = [[f'<{k}>'] + _scol(cidx[k]) for k in cidx.names] + slns
            if nri > 0:
                sidx = [[''] * nci + [f'<{k}>'] + _srow(ridx[k]) for k in ridx.names]
                slns = [list(ix) + ln for ix,ln in zip(zip(*sidx), slns)]

            def _sfmt(lns, pos):
                size = max(collapse(smap(lns, lambda x: smap(x[pos], lambda v: len(v) if v not in (' ... ', ' ... ... ') else 0)))) + 1
                for ln in lns: ln[pos] = smap(ln[pos], lambda x: '{0:>{1}s}'.format(x, size) if x != ' ... ' else x)
                return lns

            if nri > 0: slns = _sfmt(slns, slice(None,nri))
            slns = _sfmt(slns, slice(nri,nri+1))
            slns = _sfmt(slns, slice(nri+1,None))

            return smap(slns, lambda ln: paste(ln, sep = delimiter))
Esempio n. 3
0
    def _exec(self, pms):
        params, stdin, timeout = pms  # for multiproc

        exlst = [self._bin] + ([] if missing(params) else smap(
            params, lambda x: str(x).strip()))
        if self._shell:
            exlst = paste(smap(exlst, lambda x: x.replace(' ', r'\ ')),
                          sep=' ')

        procs = Popen(exlst,
                      stdin=PIPE,
                      stdout=PIPE,
                      stderr=PIPE,
                      shell=self._shell)
        try:
            rvals = procs.communicate(input=stdin, timeout=timeout)
            rstrs = smap(
                rvals, lambda x: ''
                if x is None else x.decode('utf-8').strip())
            rcode = procs.returncode
        except TimeoutExpired:
            procs.kill()
            rstrs = ['subprocess terminated as timeout expired', '']
            rcode = 124

        prstr = paste(rstrs, sep=' | ')
        if rcode in self._ncode:
            logging.log((logging.DEBUG if self._mute else logging.INFO), prstr)
        else:
            raise RuntimeError(f'execution failed [{rcode}]:\n{prstr}')
        return rcode, rstrs
Esempio n. 4
0
 def __str__(self):
     if len(self._arrs) == 0: return '[ ]'
     nlen = max(smap(self._arrs.keys(), len))
     olns = [
         (('{' + f':{nlen}s' + '} : ').format(k) if i == 0 else
          (' ' * (nlen + 3))) + ln
         for k, v in zip(self._arrs.keys(), smap(self._arrs.values(), str))
         for i, ln in enumerate(v.split('\n'))
     ]
     return paste(olns, sep='\n')
Esempio n. 5
0
    def _parsevals(self, value):
        if isinstance(value, StructuredArray):
            return [value._arrs[n].copy() for n in self._arrs.keys()]
        if not iterable(value): return value

        value = ll(value)
        if not iterable(value[0]): return np.asarray(value)

        value = smap(value, lambda x: np.asarray(ll(x)))
        if not len(set(smap(value, len))) == 1:
            raise ValueError('input arrays not in the same size')
        return value
Esempio n. 6
0
    def tosarray(self, withindex: bool = True) -> np.ndarray:
        rnam = np.asarray(self._rnames) if available(self._rnames) else np.array(smap(range(self.nrow), lambda x: f'[{x}]'))
        cnam = np.asarray(self._cnames) if available(self._cnames) else np.array(smap(range(self.ncol), lambda x: f'[{x}]'))
        smtx = np.vstack([np.hstack([f'#<::{self.dtype.str}::>', cnam]), np.hstack([rnam.reshape((-1,1)), np.asarray(self._dmatx, dtype = str)])])
        if not withindex: return smtx

        if available(self._rindex):
            ridx = self._rindex.tosarray()
            smtx = np.hstack([ridx.T, smtx])
        if available(self._cindex):
            cidx = self._cindex.tosarray()
            if available(self._rindex): cidx = np.hstack([np.tile([''], (self._cindex.size, self._rindex.size)), cidx])
            smtx = np.vstack([cidx, smtx])
        return smtx
Esempio n. 7
0
def loadlns(fname: Union[str, Path],
            mode: str = 'r',
            strip: bool = True) -> List[str]:
    checkInputFile(fname)
    with open(fname, mode) as f:
        lns = f.readlines()
    lns = smap(lns, lambda x: x.rstrip('\n'))
    if strip: lns = drop(lns, lambda x: x.strip() == '')
    return lns
Esempio n. 8
0
 def fromsarray(cls, array: np.ndarray) -> StructuredArray:
     _r = re.compile('<(.*)::([<>|]?[biufcmMOSUV]\\d*)>')
     nams, vals = array[:, 0], array[:, 1:]
     nams, vdts = np.vectorize(lambda x: (lambda v: v[0] if len(v) > 0 else
                                          '')(_r.findall(x)))(nams)
     vals = smap(
         zip(vals, vdts),
         unpack(lambda v, d: np.asarray(v).astype(d)
                if d != '|b1' else v == 'True'))
     return StructuredArray(zip(nams, vals))
Esempio n. 9
0
 def asMatrix(val: Iterable[Iterable],
              nrow: Optional[int] = None,
              ncol: Optional[int] = None,
              rownames: Optional[Iterable] = None,
              colnames: Optional[Iterable] = None) -> robj.Matrix:
     if not (isinstance(val, np.ndarray) and val.ndim == 2):
         val = np.asarray(smap(val, ll))
     if missing(nrow) and missing(ncol): nrow, ncol = val.shape
     matx = robj.r.matrix(val, nrow=nrow, ncol=ncol)
     if available(rownames):
         matx.rownames = robj.StrVector(np.asarray(ll(rownames), dtype=str))
     if available(colnames):
         matx.colnames = robj.StrVector(np.asarray(ll(colnames), dtype=str))
     return matx
Esempio n. 10
0
    def __init__(self, X: Iterable[Iterable], *, dtype: Optional[Union[str, type, np.ndarray.dtype]] = None,
                 rownames: Optional[Union[Iterable[str], NamedIndex]] = None, rowindex: Optional[Union[Iterable, Mapping, np.ndarray, StructuredArray]] = None,
                 colnames: Optional[Union[Iterable[str], NamedIndex]] = None, colindex: Optional[Union[Iterable, Mapping, np.ndarray, StructuredArray]] = None,
                 metadata: Optional[Union[Sequence, Mapping]] = None, memmap: Optional[Union[str, Path]] = None):
        if not isinstance(X, np.ndarray): X = smap(X, ll)
        self._dmatx = np.array(X, dtype = dtype) # make a copy
        if self._dmatx.ndim != 2: raise ValueError('input data is not a 2-dimensional matrix')

        self._memmap = None
        if available(memmap): self.offload(memmap)

        self._rnames = self._cnames = None
        self.rows_ = rownames
        self.cols_ = colnames

        self._rindex = self._cindex = None
        self.ridx_ = rowindex
        self.cidx_ = colindex

        self._metas = Metadata(optional(metadata, ()))
Esempio n. 11
0
    def _parseids(self, idx, axis=None, mapslice=True):
        if missing(axis):
            sids, aids = (idx, slice(None)) if not isinstance(idx, tuple) else \
                         (idx[0], slice(None)) if len(idx) == 1 else idx
        else:
            if isinstance(idx, tuple):
                raise IndexError('too many dimensions for array')
            if axis not in (0, 1): raise IndexError('invalid axis value')
            sids, aids = (idx, slice(None)) if axis == 0 else (slice(None),
                                                               idx)

        def _wrap(ids):
            if ids is None: return slice(None)
            if isinstance(ids, slice): return ids
            if not listable(ids): return [ids]
            return ids

        sids, aids = smap((sids, aids), _wrap)

        if (isinstance(sids, slice) and mapslice) or (
                listable(sids) and checkany(sids, lambda x: not isstring(x))):
            sids = self.names[sids]
        return sids, aids
Esempio n. 12
0
    def _parsevals(self, value):
        if isinstance(value, Table): return value._dmatx.astype(self.dtype)
        if isinstance(value, np.ndarray): return value.astype(self.dtype)
        if not iterable(value): return value

        value = ll(value)
        if not iterable(value[0]): return np.asarray(value, dtype = self.dtype)

        value = np.asarray(value if isinstance(value, np.ndarray) and value.ndim == 2 else smap(value, ll), dtype = self.dtype)
        return value
Esempio n. 13
0
 def _sfmt(lns, pos):
     size = max(collapse(smap(lns, lambda x: smap(x[pos], lambda v: len(v) if v not in (' ... ', ' ... ... ') else 0)))) + 1
     for ln in lns: ln[pos] = smap(ln[pos], lambda x: '{0:>{1}s}'.format(x, size) if x != ' ... ' else x)
     return lns
Esempio n. 14
0
 def createTable(
     self, tableName: str,
     columns: Iterable[Iterable[str]] = ()) -> SQLiteWrapper:
     tcols = paste(smap(columns, lambda x: paste(x, sep=' ')), sep=', ')
     self.execute(f"CREATE TABLE '{tableName}'({tcols})")
     return self