def fromsarray(cls, array: np.ndarray, dtype: Optional[Union[str, type, np.ndarray.dtype]] = None, headerpos: Optional[Union[Sequence[int], np.ndarray]] = None) -> Table: _r = re.compile('#<::([<>|]?[biufcmMOSUV]\\d*)::>') _findt = lambda x: (lambda v: v[0] if len(v) > 0 else '')(_r.findall(x)) if missing(headerpos): mtab = np.vectorize(_findt)(array[:100,:100]) dpos = np.c_[np.where(mtab != '')] if dpos.shape[0] >= 2: raise ValueError('string array has multiple headers') if dpos.shape[0] == 0: raise ValueError('string array has no header in the first 100 rows / cols') headerpos = dpos[0] rids, cids = headerpos if missing(dtype): dtype = _findt(array[rids,cids]) if dtype == '': raise ValueError('unknown array data type') ridx = StructuredArray.fromsarray(array[rids:,:cids].T) if cids > 0 else None cidx = StructuredArray.fromsarray(array[:rids,cids:]) if rids > 0 else None rnam = array[rids+1:,cids] if np.all(rnam == smap(range(rnam.shape[0]), lambda x: f'[{x}]')): rnam = None cnam = array[rids,cids+1:] if np.all(cnam == smap(range(cnam.shape[0]), lambda x: f'[{x}]')): cnam = None dmtx = array[rids+1:,cids+1:] return Table(dmtx, dtype = dtype, rownames = rnam, colnames = cnam, rowindex = ridx, colindex = cidx)
def _fmt(mtx, rnam, cnam, ridx, cidx): nr, nc = mtx.shape if missing(rnam): rnam = smap(range(nr), lambda x: f'[{x}]') if missing(cnam): cnam = smap(range(nc), lambda x: f'[{x}]') _sln = lambda x,sr,hd,tl,rp: (smap(x[:hd],str) + [rp] + smap(x[tl:],str)) if sr else smap(x, str) _scol = lambda x: _sln(x, nc > strinkcols, 3, -1, ' ... ') _srow = lambda x: _sln(x, nr > strinkrows, 5, -3, '') slns = [_scol(cnam)] + \ ([_scol(ln) for ln in mtx] if nr <= strinkrows else ([_scol(ln) for ln in mtx[:5]] + [_scol([' ... ... '] + [''] * (nc-1))] + [_scol(ln) for ln in mtx[-3:]])) slns = [['#'] + slns[0]] + [[n] + ln for n,ln in zip(_srow(rnam), slns[1:])] nri = ridx.size if available(ridx) else 0 nci = cidx.size if available(cidx) else 0 if nci > 0: slns = [[f'<{k}>'] + _scol(cidx[k]) for k in cidx.names] + slns if nri > 0: sidx = [[''] * nci + [f'<{k}>'] + _srow(ridx[k]) for k in ridx.names] slns = [list(ix) + ln for ix,ln in zip(zip(*sidx), slns)] def _sfmt(lns, pos): size = max(collapse(smap(lns, lambda x: smap(x[pos], lambda v: len(v) if v not in (' ... ', ' ... ... ') else 0)))) + 1 for ln in lns: ln[pos] = smap(ln[pos], lambda x: '{0:>{1}s}'.format(x, size) if x != ' ... ' else x) return lns if nri > 0: slns = _sfmt(slns, slice(None,nri)) slns = _sfmt(slns, slice(nri,nri+1)) slns = _sfmt(slns, slice(nri+1,None)) return smap(slns, lambda ln: paste(ln, sep = delimiter))
def _exec(self, pms): params, stdin, timeout = pms # for multiproc exlst = [self._bin] + ([] if missing(params) else smap( params, lambda x: str(x).strip())) if self._shell: exlst = paste(smap(exlst, lambda x: x.replace(' ', r'\ ')), sep=' ') procs = Popen(exlst, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=self._shell) try: rvals = procs.communicate(input=stdin, timeout=timeout) rstrs = smap( rvals, lambda x: '' if x is None else x.decode('utf-8').strip()) rcode = procs.returncode except TimeoutExpired: procs.kill() rstrs = ['subprocess terminated as timeout expired', ''] rcode = 124 prstr = paste(rstrs, sep=' | ') if rcode in self._ncode: logging.log((logging.DEBUG if self._mute else logging.INFO), prstr) else: raise RuntimeError(f'execution failed [{rcode}]:\n{prstr}') return rcode, rstrs
def __str__(self): if len(self._arrs) == 0: return '[ ]' nlen = max(smap(self._arrs.keys(), len)) olns = [ (('{' + f':{nlen}s' + '} : ').format(k) if i == 0 else (' ' * (nlen + 3))) + ln for k, v in zip(self._arrs.keys(), smap(self._arrs.values(), str)) for i, ln in enumerate(v.split('\n')) ] return paste(olns, sep='\n')
def _parsevals(self, value): if isinstance(value, StructuredArray): return [value._arrs[n].copy() for n in self._arrs.keys()] if not iterable(value): return value value = ll(value) if not iterable(value[0]): return np.asarray(value) value = smap(value, lambda x: np.asarray(ll(x))) if not len(set(smap(value, len))) == 1: raise ValueError('input arrays not in the same size') return value
def tosarray(self, withindex: bool = True) -> np.ndarray: rnam = np.asarray(self._rnames) if available(self._rnames) else np.array(smap(range(self.nrow), lambda x: f'[{x}]')) cnam = np.asarray(self._cnames) if available(self._cnames) else np.array(smap(range(self.ncol), lambda x: f'[{x}]')) smtx = np.vstack([np.hstack([f'#<::{self.dtype.str}::>', cnam]), np.hstack([rnam.reshape((-1,1)), np.asarray(self._dmatx, dtype = str)])]) if not withindex: return smtx if available(self._rindex): ridx = self._rindex.tosarray() smtx = np.hstack([ridx.T, smtx]) if available(self._cindex): cidx = self._cindex.tosarray() if available(self._rindex): cidx = np.hstack([np.tile([''], (self._cindex.size, self._rindex.size)), cidx]) smtx = np.vstack([cidx, smtx]) return smtx
def loadlns(fname: Union[str, Path], mode: str = 'r', strip: bool = True) -> List[str]: checkInputFile(fname) with open(fname, mode) as f: lns = f.readlines() lns = smap(lns, lambda x: x.rstrip('\n')) if strip: lns = drop(lns, lambda x: x.strip() == '') return lns
def fromsarray(cls, array: np.ndarray) -> StructuredArray: _r = re.compile('<(.*)::([<>|]?[biufcmMOSUV]\\d*)>') nams, vals = array[:, 0], array[:, 1:] nams, vdts = np.vectorize(lambda x: (lambda v: v[0] if len(v) > 0 else '')(_r.findall(x)))(nams) vals = smap( zip(vals, vdts), unpack(lambda v, d: np.asarray(v).astype(d) if d != '|b1' else v == 'True')) return StructuredArray(zip(nams, vals))
def asMatrix(val: Iterable[Iterable], nrow: Optional[int] = None, ncol: Optional[int] = None, rownames: Optional[Iterable] = None, colnames: Optional[Iterable] = None) -> robj.Matrix: if not (isinstance(val, np.ndarray) and val.ndim == 2): val = np.asarray(smap(val, ll)) if missing(nrow) and missing(ncol): nrow, ncol = val.shape matx = robj.r.matrix(val, nrow=nrow, ncol=ncol) if available(rownames): matx.rownames = robj.StrVector(np.asarray(ll(rownames), dtype=str)) if available(colnames): matx.colnames = robj.StrVector(np.asarray(ll(colnames), dtype=str)) return matx
def __init__(self, X: Iterable[Iterable], *, dtype: Optional[Union[str, type, np.ndarray.dtype]] = None, rownames: Optional[Union[Iterable[str], NamedIndex]] = None, rowindex: Optional[Union[Iterable, Mapping, np.ndarray, StructuredArray]] = None, colnames: Optional[Union[Iterable[str], NamedIndex]] = None, colindex: Optional[Union[Iterable, Mapping, np.ndarray, StructuredArray]] = None, metadata: Optional[Union[Sequence, Mapping]] = None, memmap: Optional[Union[str, Path]] = None): if not isinstance(X, np.ndarray): X = smap(X, ll) self._dmatx = np.array(X, dtype = dtype) # make a copy if self._dmatx.ndim != 2: raise ValueError('input data is not a 2-dimensional matrix') self._memmap = None if available(memmap): self.offload(memmap) self._rnames = self._cnames = None self.rows_ = rownames self.cols_ = colnames self._rindex = self._cindex = None self.ridx_ = rowindex self.cidx_ = colindex self._metas = Metadata(optional(metadata, ()))
def _parseids(self, idx, axis=None, mapslice=True): if missing(axis): sids, aids = (idx, slice(None)) if not isinstance(idx, tuple) else \ (idx[0], slice(None)) if len(idx) == 1 else idx else: if isinstance(idx, tuple): raise IndexError('too many dimensions for array') if axis not in (0, 1): raise IndexError('invalid axis value') sids, aids = (idx, slice(None)) if axis == 0 else (slice(None), idx) def _wrap(ids): if ids is None: return slice(None) if isinstance(ids, slice): return ids if not listable(ids): return [ids] return ids sids, aids = smap((sids, aids), _wrap) if (isinstance(sids, slice) and mapslice) or ( listable(sids) and checkany(sids, lambda x: not isstring(x))): sids = self.names[sids] return sids, aids
def _parsevals(self, value): if isinstance(value, Table): return value._dmatx.astype(self.dtype) if isinstance(value, np.ndarray): return value.astype(self.dtype) if not iterable(value): return value value = ll(value) if not iterable(value[0]): return np.asarray(value, dtype = self.dtype) value = np.asarray(value if isinstance(value, np.ndarray) and value.ndim == 2 else smap(value, ll), dtype = self.dtype) return value
def _sfmt(lns, pos): size = max(collapse(smap(lns, lambda x: smap(x[pos], lambda v: len(v) if v not in (' ... ', ' ... ... ') else 0)))) + 1 for ln in lns: ln[pos] = smap(ln[pos], lambda x: '{0:>{1}s}'.format(x, size) if x != ' ... ' else x) return lns
def createTable( self, tableName: str, columns: Iterable[Iterable[str]] = ()) -> SQLiteWrapper: tcols = paste(smap(columns, lambda x: paste(x, sep=' ')), sep=', ') self.execute(f"CREATE TABLE '{tableName}'({tcols})") return self