def __str__(self): getters, colnums, rownums = self._getters, self._colnums, self._rownums nrows, nobs = self._nrows, self._nobs ncols, nvar = self._ncols, self._nvar fmts = self._formats nobs_str = str(nobs) nrow_str = "" if nrows == nobs else (" (" + str(nrows) + " rows)") nvar_str = str(nvar) ncol_str = "" if ncols == nvar else (" (" + str(ncols) + " columns)") header = ( " {{txt}}" + "obs: {{:>{m}}}{{}}\n vars: {{:>{m}}}{{}}".format( m=max((len(nobs_str), len(nvar_str))) ) ) header = header.format(nobs_str, nrow_str, nvar_str, ncol_str) if nrows == 0 or ncols == 0: return "\n" + header + "\n\n" rows = [] append = rows.append for i, c in enumerate(colnums): if st_isstrvar(c): m = STR_FMT_RE.match(fmts[i]) width = int(m.group(3)) if m else 11 align = "<" if m and m.group(1) == "-" else ">" fmt = "{:" + align + str(width) + "}" append([fmt.format(_st_sdata(r,c)[:width]) for r in rownums]) else: fmt = fmts[i] if not STR_FMT_RE.match(fmts[i]) else "%9.0g" append([st_format(fmt, _st_data(r,c)) for r in rownums]) rows = [[inner[i] for inner in rows] for i in range(nrows)] maxrow = max(rownums) ndigits = 1 if maxrow == 0 else floor(log(maxrow, 10)) + 1 row_fmt = "{{txt}}{:>" + str(ndigits+1) + "}" col_fmt = ["{:>" + str(len(s)) + "}" for s in rows[0]] for row, i in zip(rows, rownums): row.insert(0, row_fmt.format("r" + str(i)) + "{res}") rows.insert( 0, [row_fmt.format("")] + [ col_fmt[i].format("c" + str(v)) for v,i in zip(colnums, range(ncols)) ] ) return ("\n" + header + "\n\n" + "\n".join(" ".join(r for r in row) for row in rows))
def __init__(self, rowNums=None, colNums=None, selectVar=None): if not rowNums: self._nObs = self._nRows = st_nobs() rowNums = tuple(range(self._nObs)) else: # using set because there could be duplicates self._nObs = len(set(rowNums)) self._nRows = len(rowNums) if not colNums: self._nVar = self._nCols = st_nvar() colNums = tuple(range(self._nVar)) else: self._nVar = len(set(colNums)) self._nCols = len(colNums) if not (selectVar is None or selectVar == ""): if isinstance(selectVar, str): selectVar = st_varindex(selectVar, True) elif not isinstance(selectVar, int): raise TypeError("selectVar should be str, int, or None") if selectVar == -1: rowNums = tuple( r for r in rowNums if not any(st_ismissing(_st_data(r, c)) for c in colNums)) else: rowNums = tuple(r for r in rowNums if _st_data(r, selectVar) != 0) self._rowNums = rowNums self._colNums = colNums self._formats = [ "%11s" if st_isstrvar(c) else "%9.0g" for c in colNums ] self._getters = [ _st_sdata if st_isstrvar(c) else _st_data for c in colNums ] self._setters = [ _st_sstore if st_isstrvar(c) else _st_store for c in colNums ]
def st_data(obs, cols): """return numeric data in given observation numbers as a list of lists, one sub-list for each row; obs should be int or iterable of int; cols should be a single str or int or iterable of str or int """ obs, cols, _ = _parseObsColsVals(obs, cols) if not all(st_isnumvar(c) for c in cols): raise TypeError("only numeric Stata variables allowed") return [[_st_data(i, j) for j in cols] for i in obs]
def __repr__(self): getters, colNums, rowNums = self._getters, self._colNums, self._rowNums nRows, nObs = self._nRows, self._nObs nCols, nVar = self._nCols, self._nVar fmts = self._formats nObsStr = str(nObs) nRowStr = "" if nRows == nObs else (" (" + str(nRows) + " rows)") nVarStr = str(nVar) nColStr = "" if nCols == nVar else (" (" + str(nCols) + " columns)") header = (" {{txt}}" + "obs: {{:>{m}}}{{}}\n vars: {{:>{m}}}{{}}".format( m=max((len(nObsStr), len(nVarStr)))) ) header = header.format(nObsStr, nRowStr, nVarStr, nColStr) if nRows == 0 or nCols == 0: return "\n" + header + "\n\n" strList = [] append = strList.append for c,i in zip(colNums, range(nCols)): if st_isstrvar(c): m = STR_FMT_RE.match(fmts[i]) width = int(m.group(3)) if m else 11 align = "<" if m and m.group(1) == "-" else ">" fmt = "{:" + align + str(width) + "}" append([fmt.format(_st_sdata(r,c)[:width]) for r in rowNums]) else: fmt = fmts[i] if not STR_FMT_RE.match(fmts[i]) else "%9.0g" append([st_format(fmt, _st_data(r,c)) for r in rowNums]) strList = [[inner[i] for inner in strList] for i in range(nRows)] maxRow = max(rowNums) nDigits = 1 if maxRow == 0 else floor(log(maxRow, 10)) + 1 rowFmt = "{{txt}}{:>" + str(nDigits+1) + "}" colFmt = ["{:>" + str(len(s)) + "}" for s in strList[0]] for row, i in zip(strList, rowNums): row.insert(0, rowFmt.format("r" + str(i)) + "{res}") strList.insert(0, [rowFmt.format("")] + [colFmt[i].format("c" + str(v)) for v,i in zip(colNums, range(nCols))]) return ("\n" + header + "\n\n" + "\n".join(" ".join(r for r in row) for row in strList))
def __str__(self): getters, colnums, rownums = self._getters, self._colnums, self._rownums nrows, nobs = self._nrows, self._nobs ncols, nvar = self._ncols, self._nvar fmts = self._formats nobs_str = str(nobs) nrow_str = "" if nrows == nobs else (" (" + str(nrows) + " rows)") nvar_str = str(nvar) ncol_str = "" if ncols == nvar else (" (" + str(ncols) + " columns)") header = (" {{txt}}" + "obs: {{:>{m}}}{{}}\n vars: {{:>{m}}}{{}}".format( m=max((len(nobs_str), len(nvar_str))))) header = header.format(nobs_str, nrow_str, nvar_str, ncol_str) if nrows == 0 or ncols == 0: return "\n" + header + "\n\n" rows = [] append = rows.append for i, c in enumerate(colnums): if st_isstrvar(c): m = STR_FMT_RE.match(fmts[i]) width = int(m.group(3)) if m else 11 align = "<" if m and m.group(1) == "-" else ">" fmt = "{:" + align + str(width) + "}" append([fmt.format(_st_sdata(r, c)[:width]) for r in rownums]) else: fmt = fmts[i] if not STR_FMT_RE.match(fmts[i]) else "%9.0g" append([st_format(fmt, _st_data(r, c)) for r in rownums]) rows = [[inner[i] for inner in rows] for i in range(nrows)] maxrow = max(rownums) ndigits = 1 if maxrow == 0 else floor(log(maxrow, 10)) + 1 row_fmt = "{{txt}}{:>" + str(ndigits + 1) + "}" col_fmt = ["{:>" + str(len(s)) + "}" for s in rows[0]] for row, i in zip(rows, rownums): row.insert(0, row_fmt.format("r" + str(i)) + "{res}") rows.insert(0, [row_fmt.format("")] + [ col_fmt[i].format("c" + str(v)) for v, i in zip(colnums, range(ncols)) ]) return ("\n" + header + "\n\n" + "\n".join(" ".join(r for r in row) for row in rows))
def get(self, rownum, colnum): """Get single data value from view Parameters ---------- rownum : int Stata observation number colnum : int Stata variable index Returns ------- string, float, or MissingValue instance, depending on data type of Stata variable """ if st_isstrvar(colnum): return _st_sdata(rownum, colnum) else: return _st_data(rownum, colnum)
def st_data(obsnums, vars): """Return numeric data in given observations and Stata variables. Parameters ---------- obsnums : int or iterable of int vars : int, str, or iterable of int or str integers denote column numbers strings should be Stata variable names or unambiguous abbreviations Returns ------- List of lists of float or MissingValue, one sub-list for each observation """ obsnums, vars, _ = _parse_obs_cols_vals(obsnums, vars) if not all(st_isnumvar(v) for v in vars): raise TypeError("only numeric Stata variables allowed") return [[_st_data(i,j) for j in vars] for i in obsnums]
def st_data(obsnums, vars): """Return numeric data in given observations and Stata variables. Parameters ---------- obsnums : int or iterable of int vars : int, str, or iterable of int or str integers denote column numbers strings should be Stata variable names or unambiguous abbreviations Returns ------- List of lists of float or MissingValue, one sub-list for each observation """ obsnums, vars, _ = _parse_obs_cols_vals(obsnums, vars) if not all(st_isnumvar(v) for v in vars): raise TypeError("only numeric Stata variables allowed") return [[_st_data(i, j) for j in vars] for i in obsnums]
def st_view(rownums=None, varnums=None, selectvar=""): """Return a view onto current Stata data Parameters ---------- rownums : int, iterable of int, None, or MissingValue instance optional default value is None if not specified, or is None or MissingValue instance, a view on all observations will be returned varnums : int, iterable of int, None, or MissingValue instance optional default value is None if not specified, or is None or MissingValue instance, a view on all Stata variables will be returned selectvar : string, int, None, or MissingValue instance optional default value is the empty string if not specified, or if the empty string, all rows in `rownums` will be included if specified as an integer or non-empty string, the corresponding Stata variable must be numeric; all rows in `rownums` will be included where the Stata variable has non-zero values if specified as None or a MissingValue instance, all rows in `rownums` will be included where _none_ of the `varnums` variables have missing values Returns ------- instance of StataView class """ nobs = st_nobs() nvar = st_nvar() if not st_ismissing(rownums): if isinstance(rownums, int): rownums = (rownums,) elif not isinstance(rownums, collections.Iterable): raise TypeError("rownums should be int or iterable of int") else: if not hasattr(rownums, "__len__"): # a test for persistence rownums = tuple(rownums) if not all(isinstance(r, int) for r in rownums): raise TypeError("rownums must be integers") if not all(-nobs <= r < nobs for r in rownums): raise IndexError("rownums out of range") rownums = tuple(r if r >= 0 else nobs + r for r in rownums) else: rownums = None if not st_ismissing(varnums): if isinstance(varnums, int): varnums = (varnums,) elif not isinstance(varnums, collections.Iterable): raise TypeError("varnums should be int or iterable of int") else: if not hasattr(varnums, "__len__"): varnums = tuple(rownums) if not all(isinstance(c, int) for c in varnums): raise TypeError("varnums must be integers") if not all(-nvar <= c < nvar for c in varnums): raise IndexError("varnums out of range") varnums = tuple(c if c >= 0 else nvar + c for c in varnums) else: varnums = None if not selectvar == "": if rownums is None: rownums = tuple(range(nobs)) if st_ismissing(selectvar): numeric = tuple( c for c in (range(nvar) if varnums is None else varnums) if st_isnumvar(c) ) rownums = tuple( r for r in rownums if not any(st_ismissing(_st_data(r,c)) for c in numeric) ) else: if isinstance(selectvar, str): selectvar = st_varindex(selectvar, True) elif not isinstance(selectvar, int): raise TypeError("selectvar misspecified; invalid type") elif not -nvar <= selectvar < nvar: raise IndexError("selectvar index out of range") rownums = tuple( r for r in rownums if _st_data(r, selectvar) != 0 ) return StataView(rownums, varnums)
def st_view(rownums=None, varnums=None, selectvar=""): """Return a view onto current Stata data Parameters ---------- rownums : int, iterable of int, None, or MissingValue instance optional default value is None if not specified, or is None or MissingValue instance, a view on all observations will be returned varnums : int, iterable of int, None, or MissingValue instance optional default value is None if not specified, or is None or MissingValue instance, a view on all Stata variables will be returned selectvar : string, int, None, or MissingValue instance optional default value is the empty string if not specified, or if the empty string, all rows in `rownums` will be included if specified as an integer or non-empty string, the corresponding Stata variable must be numeric; all rows in `rownums` will be included where the Stata variable has non-zero values if specified as None or a MissingValue instance, all rows in `rownums` will be included where _none_ of the `varnums` variables have missing values Returns ------- instance of StataView class """ nobs = st_nobs() nvar = st_nvar() if not st_ismissing(rownums): if isinstance(rownums, int): rownums = (rownums, ) elif not isinstance(rownums, collections.Iterable): raise TypeError("rownums should be int or iterable of int") else: if not hasattr(rownums, "__len__"): # a test for persistence rownums = tuple(rownums) if not all(isinstance(r, int) for r in rownums): raise TypeError("rownums must be integers") if not all(-nobs <= r < nobs for r in rownums): raise IndexError("rownums out of range") rownums = tuple(r if r >= 0 else nobs + r for r in rownums) else: rownums = None if not st_ismissing(varnums): if isinstance(varnums, int): varnums = (varnums, ) elif not isinstance(varnums, collections.Iterable): raise TypeError("varnums should be int or iterable of int") else: if not hasattr(varnums, "__len__"): varnums = tuple(rownums) if not all(isinstance(c, int) for c in varnums): raise TypeError("varnums must be integers") if not all(-nvar <= c < nvar for c in varnums): raise IndexError("varnums out of range") varnums = tuple(c if c >= 0 else nvar + c for c in varnums) else: varnums = None if not selectvar == "": if rownums is None: rownums = tuple(range(nobs)) if st_ismissing(selectvar): numeric = tuple( c for c in (range(nvar) if varnums is None else varnums) if st_isnumvar(c)) rownums = tuple( r for r in rownums if not any(st_ismissing(_st_data(r, c)) for c in numeric)) else: if isinstance(selectvar, str): selectvar = st_varindex(selectvar, True) elif not isinstance(selectvar, int): raise TypeError("selectvar misspecified; invalid type") elif not -nvar <= selectvar < nvar: raise IndexError("selectvar index out of range") rownums = tuple(r for r in rownums if _st_data(r, selectvar) != 0) return StataView(rownums, varnums)