Exemple #1
0
 def __str__(self):
     getters, colnums, rownums = self._getters, self._colnums, self._rownums
     nrows, nobs = self._nrows, self._nobs
     ncols, nvar = self._ncols, self._nvar
     
     fmts = self._formats
     
     nobs_str = str(nobs)
     nrow_str = "" if nrows == nobs else (" (" + str(nrows) + " rows)")
     nvar_str = str(nvar)
     ncol_str = "" if ncols == nvar else (" (" + str(ncols) + " columns)")
     
     header = (
         "  {{txt}}" +
         "obs: {{:>{m}}}{{}}\n vars: {{:>{m}}}{{}}".format(
             m=max((len(nobs_str), len(nvar_str)))
         )
     )
     header = header.format(nobs_str, nrow_str, nvar_str, ncol_str)
     
     if nrows == 0 or ncols == 0:
         return "\n" + header + "\n\n"
                         
     rows = []
     append = rows.append
     for i, c in enumerate(colnums):
         if st_isstrvar(c):
             m = STR_FMT_RE.match(fmts[i])
             width = int(m.group(3)) if m else 11
             align = "<" if m and m.group(1) == "-" else ">"
             fmt = "{:" + align + str(width) + "}"
             append([fmt.format(_st_sdata(r,c)[:width]) for r in rownums])
         else:
             fmt = fmts[i] if not STR_FMT_RE.match(fmts[i]) else "%9.0g"
             append([st_format(fmt, _st_data(r,c)) for r in rownums])
     rows = [[inner[i] for inner in rows] for i in range(nrows)]
     
     maxrow = max(rownums)
     ndigits = 1 if maxrow == 0 else floor(log(maxrow, 10)) + 1
     
     row_fmt = "{{txt}}{:>" + str(ndigits+1) + "}"
     col_fmt = ["{:>" + str(len(s)) + "}" for s in rows[0]]
     
     for row, i in zip(rows, rownums):
         row.insert(0, row_fmt.format("r" + str(i)) + "{res}")
     
     rows.insert(
         0, 
         [row_fmt.format("")] + 
         [
             col_fmt[i].format("c" + str(v))
             for v,i in zip(colnums, range(ncols))
         ]
     )
     
     return ("\n" + header + "\n\n" + 
             "\n".join(" ".join(r for r in row) for row in rows))
    def __init__(self, rowNums=None, colNums=None, selectVar=None):
        if not rowNums:
            self._nObs = self._nRows = st_nobs()
            rowNums = tuple(range(self._nObs))
        else:
            # using set because there could be duplicates
            self._nObs = len(set(rowNums))
            self._nRows = len(rowNums)

        if not colNums:
            self._nVar = self._nCols = st_nvar()
            colNums = tuple(range(self._nVar))
        else:
            self._nVar = len(set(colNums))
            self._nCols = len(colNums)

        if not (selectVar is None or selectVar == ""):
            if isinstance(selectVar, str):
                selectVar = st_varindex(selectVar, True)
            elif not isinstance(selectVar, int):
                raise TypeError("selectVar should be str, int, or None")

            if selectVar == -1:
                rowNums = tuple(
                    r for r in rowNums
                    if not any(st_ismissing(_st_data(r, c)) for c in colNums))
            else:
                rowNums = tuple(r for r in rowNums
                                if _st_data(r, selectVar) != 0)

        self._rowNums = rowNums
        self._colNums = colNums

        self._formats = [
            "%11s" if st_isstrvar(c) else "%9.0g" for c in colNums
        ]
        self._getters = [
            _st_sdata if st_isstrvar(c) else _st_data for c in colNums
        ]
        self._setters = [
            _st_sstore if st_isstrvar(c) else _st_store for c in colNums
        ]
def st_data(obs, cols):
    """return numeric data in given observation numbers as a list of lists, 
    one sub-list for each row; obs should be int or iterable of int;
    cols should be a single str or int or iterable of str or int
    
    """
    obs, cols, _ = _parseObsColsVals(obs, cols)

    if not all(st_isnumvar(c) for c in cols):
        raise TypeError("only numeric Stata variables allowed")

    return [[_st_data(i, j) for j in cols] for i in obs]
Exemple #4
0
 def __repr__(self):
     getters, colNums, rowNums = self._getters, self._colNums, self._rowNums
     nRows, nObs = self._nRows, self._nObs
     nCols, nVar = self._nCols, self._nVar
     
     fmts = self._formats
     
     nObsStr = str(nObs)
     nRowStr = "" if nRows == nObs else (" (" + str(nRows) + " rows)")
     nVarStr = str(nVar)
     nColStr = "" if nCols == nVar else (" (" + str(nCols) + " columns)")
     
     header = ("  {{txt}}" +
               "obs: {{:>{m}}}{{}}\n vars: {{:>{m}}}{{}}".format(
                     m=max((len(nObsStr), len(nVarStr))))
               )
     header = header.format(nObsStr, nRowStr, nVarStr, nColStr)
     
     if nRows == 0 or nCols == 0:
         return "\n" + header + "\n\n"
                         
     strList = []
     append = strList.append
     for c,i in zip(colNums, range(nCols)):
         if st_isstrvar(c):
             m = STR_FMT_RE.match(fmts[i])
             width = int(m.group(3)) if m else 11
             align = "<" if m and m.group(1) == "-" else ">"
             fmt = "{:" + align + str(width) + "}"
             append([fmt.format(_st_sdata(r,c)[:width]) for r in rowNums])
         else:
             fmt = fmts[i] if not STR_FMT_RE.match(fmts[i]) else "%9.0g"
             append([st_format(fmt, _st_data(r,c)) for r in rowNums])
     strList = [[inner[i] for inner in strList] for i in range(nRows)]
     
     maxRow = max(rowNums)
     nDigits = 1 if maxRow == 0 else floor(log(maxRow, 10)) + 1
     
     rowFmt = "{{txt}}{:>" + str(nDigits+1) + "}"
     colFmt = ["{:>" + str(len(s)) + "}" for s in strList[0]]
     
     for row, i in zip(strList, rowNums):
         row.insert(0, rowFmt.format("r" + str(i)) + "{res}")
     
     strList.insert(0, 
                    [rowFmt.format("")] + 
                    [colFmt[i].format("c" + str(v))
                                for v,i in zip(colNums, range(nCols))])
     
     return ("\n" + header + "\n\n" + 
             "\n".join(" ".join(r for r in row) for row in strList))
Exemple #5
0
    def __str__(self):
        getters, colnums, rownums = self._getters, self._colnums, self._rownums
        nrows, nobs = self._nrows, self._nobs
        ncols, nvar = self._ncols, self._nvar

        fmts = self._formats

        nobs_str = str(nobs)
        nrow_str = "" if nrows == nobs else (" (" + str(nrows) + " rows)")
        nvar_str = str(nvar)
        ncol_str = "" if ncols == nvar else (" (" + str(ncols) + " columns)")

        header = ("  {{txt}}" +
                  "obs: {{:>{m}}}{{}}\n vars: {{:>{m}}}{{}}".format(
                      m=max((len(nobs_str), len(nvar_str)))))
        header = header.format(nobs_str, nrow_str, nvar_str, ncol_str)

        if nrows == 0 or ncols == 0:
            return "\n" + header + "\n\n"

        rows = []
        append = rows.append
        for i, c in enumerate(colnums):
            if st_isstrvar(c):
                m = STR_FMT_RE.match(fmts[i])
                width = int(m.group(3)) if m else 11
                align = "<" if m and m.group(1) == "-" else ">"
                fmt = "{:" + align + str(width) + "}"
                append([fmt.format(_st_sdata(r, c)[:width]) for r in rownums])
            else:
                fmt = fmts[i] if not STR_FMT_RE.match(fmts[i]) else "%9.0g"
                append([st_format(fmt, _st_data(r, c)) for r in rownums])
        rows = [[inner[i] for inner in rows] for i in range(nrows)]

        maxrow = max(rownums)
        ndigits = 1 if maxrow == 0 else floor(log(maxrow, 10)) + 1

        row_fmt = "{{txt}}{:>" + str(ndigits + 1) + "}"
        col_fmt = ["{:>" + str(len(s)) + "}" for s in rows[0]]

        for row, i in zip(rows, rownums):
            row.insert(0, row_fmt.format("r" + str(i)) + "{res}")

        rows.insert(0, [row_fmt.format("")] + [
            col_fmt[i].format("c" + str(v))
            for v, i in zip(colnums, range(ncols))
        ])

        return ("\n" + header + "\n\n" + "\n".join(" ".join(r for r in row)
                                                   for row in rows))
Exemple #6
0
 def get(self, rownum, colnum):
     """Get single data value from view
     
     Parameters
     ----------
     rownum : int
         Stata observation number
     colnum : int
         Stata variable index
         
     Returns
     -------
     string, float, or MissingValue instance, depending
     on data type of Stata variable
     
     """
     if st_isstrvar(colnum): 
         return _st_sdata(rownum, colnum)
     else:
         return _st_data(rownum, colnum)
Exemple #7
0
 def get(self, rownum, colnum):
     """Get single data value from view
     
     Parameters
     ----------
     rownum : int
         Stata observation number
     colnum : int
         Stata variable index
         
     Returns
     -------
     string, float, or MissingValue instance, depending
     on data type of Stata variable
     
     """
     if st_isstrvar(colnum):
         return _st_sdata(rownum, colnum)
     else:
         return _st_data(rownum, colnum)
Exemple #8
0
def st_data(obsnums, vars):
    """Return numeric data in given observations and Stata variables.
    
    Parameters
    ----------
    obsnums : int or iterable of int
    vars : int, str, or iterable of int or str
        integers denote column numbers
        strings should be Stata variable names or 
          unambiguous abbreviations
    
    Returns
    -------
    List of lists of float or MissingValue,
    one sub-list for each observation
    
    """
    obsnums, vars, _ = _parse_obs_cols_vals(obsnums, vars)
    
    if not all(st_isnumvar(v) for v in vars):
        raise TypeError("only numeric Stata variables allowed")
    
    return [[_st_data(i,j) for j in vars] for i in obsnums]
Exemple #9
0
def st_data(obsnums, vars):
    """Return numeric data in given observations and Stata variables.
    
    Parameters
    ----------
    obsnums : int or iterable of int
    vars : int, str, or iterable of int or str
        integers denote column numbers
        strings should be Stata variable names or 
          unambiguous abbreviations
    
    Returns
    -------
    List of lists of float or MissingValue,
    one sub-list for each observation
    
    """
    obsnums, vars, _ = _parse_obs_cols_vals(obsnums, vars)

    if not all(st_isnumvar(v) for v in vars):
        raise TypeError("only numeric Stata variables allowed")

    return [[_st_data(i, j) for j in vars] for i in obsnums]
Exemple #10
0
def st_view(rownums=None, varnums=None, selectvar=""):
    """Return a view onto current Stata data
    
    Parameters
    ----------
    rownums : int, iterable of int, None, or MissingValue instance
        optional
        default value is None
        if not specified, or is None or MissingValue instance,
            a view on all observations will be returned
    varnums : int, iterable of int, None, or MissingValue instance
        optional
        default value is None
        if not specified, or is None or MissingValue instance,
            a view on all Stata variables will be returned
    selectvar : string, int, None, or MissingValue instance
        optional
        default value is the empty string
        if not specified, or if the empty string, all rows in
            `rownums` will be included
        if specified as an integer or non-empty string, the
            corresponding Stata variable must be numeric;
            all rows in `rownums` will be included where the
            Stata variable has non-zero values
        if specified as None or a MissingValue instance, all
            rows in `rownums` will be included where _none_ 
            of the `varnums` variables have missing values
    
    Returns
    -------
    instance of StataView class    
    
    """
    nobs = st_nobs()
    nvar = st_nvar()
    
    if not st_ismissing(rownums):
        if isinstance(rownums, int):
            rownums = (rownums,)
        elif not isinstance(rownums, collections.Iterable):
            raise TypeError("rownums should be int or iterable of int")
        else:
            if not hasattr(rownums, "__len__"):  # a test for persistence
                rownums = tuple(rownums)
            if not all(isinstance(r, int) for r in rownums):
                raise TypeError("rownums must be integers")
            if not all(-nobs <= r < nobs for r in rownums):
                raise IndexError("rownums out of range")
            rownums = tuple(r if r >= 0 else nobs + r for r in rownums)
    else:
        rownums = None
    
    if not st_ismissing(varnums):
        if isinstance(varnums, int):
            varnums = (varnums,)
        elif not isinstance(varnums, collections.Iterable):
            raise TypeError("varnums should be int or iterable of int")
        else:
            if not hasattr(varnums, "__len__"):
                varnums = tuple(rownums)
            if not all(isinstance(c, int) for c in varnums):
                raise TypeError("varnums must be integers")
            if not all(-nvar <= c < nvar for c in varnums):
                raise IndexError("varnums out of range")
            varnums = tuple(c if c >= 0 else nvar + c for c in varnums)
    else:
        varnums = None
            
    if not selectvar == "":        
        if rownums is None:
            rownums = tuple(range(nobs))
        
        if st_ismissing(selectvar):
            numeric = tuple(
                c for c in (range(nvar) if varnums is None else varnums)
                if st_isnumvar(c)
            )
            rownums = tuple(
                r for r in rownums
                if not any(st_ismissing(_st_data(r,c)) for c in numeric)
            )
        else:
            if isinstance(selectvar, str):
                selectvar = st_varindex(selectvar, True)
            elif not isinstance(selectvar, int):
                raise TypeError("selectvar misspecified; invalid type")
            elif not -nvar <= selectvar < nvar:
                raise IndexError("selectvar index out of range")
            rownums = tuple(
                r for r in rownums if _st_data(r, selectvar) != 0
            )
            
    return StataView(rownums, varnums)
Exemple #11
0
def st_view(rownums=None, varnums=None, selectvar=""):
    """Return a view onto current Stata data
    
    Parameters
    ----------
    rownums : int, iterable of int, None, or MissingValue instance
        optional
        default value is None
        if not specified, or is None or MissingValue instance,
            a view on all observations will be returned
    varnums : int, iterable of int, None, or MissingValue instance
        optional
        default value is None
        if not specified, or is None or MissingValue instance,
            a view on all Stata variables will be returned
    selectvar : string, int, None, or MissingValue instance
        optional
        default value is the empty string
        if not specified, or if the empty string, all rows in
            `rownums` will be included
        if specified as an integer or non-empty string, the
            corresponding Stata variable must be numeric;
            all rows in `rownums` will be included where the
            Stata variable has non-zero values
        if specified as None or a MissingValue instance, all
            rows in `rownums` will be included where _none_ 
            of the `varnums` variables have missing values
    
    Returns
    -------
    instance of StataView class    
    
    """
    nobs = st_nobs()
    nvar = st_nvar()

    if not st_ismissing(rownums):
        if isinstance(rownums, int):
            rownums = (rownums, )
        elif not isinstance(rownums, collections.Iterable):
            raise TypeError("rownums should be int or iterable of int")
        else:
            if not hasattr(rownums, "__len__"):  # a test for persistence
                rownums = tuple(rownums)
            if not all(isinstance(r, int) for r in rownums):
                raise TypeError("rownums must be integers")
            if not all(-nobs <= r < nobs for r in rownums):
                raise IndexError("rownums out of range")
            rownums = tuple(r if r >= 0 else nobs + r for r in rownums)
    else:
        rownums = None

    if not st_ismissing(varnums):
        if isinstance(varnums, int):
            varnums = (varnums, )
        elif not isinstance(varnums, collections.Iterable):
            raise TypeError("varnums should be int or iterable of int")
        else:
            if not hasattr(varnums, "__len__"):
                varnums = tuple(rownums)
            if not all(isinstance(c, int) for c in varnums):
                raise TypeError("varnums must be integers")
            if not all(-nvar <= c < nvar for c in varnums):
                raise IndexError("varnums out of range")
            varnums = tuple(c if c >= 0 else nvar + c for c in varnums)
    else:
        varnums = None

    if not selectvar == "":
        if rownums is None:
            rownums = tuple(range(nobs))

        if st_ismissing(selectvar):
            numeric = tuple(
                c for c in (range(nvar) if varnums is None else varnums)
                if st_isnumvar(c))
            rownums = tuple(
                r for r in rownums
                if not any(st_ismissing(_st_data(r, c)) for c in numeric))
        else:
            if isinstance(selectvar, str):
                selectvar = st_varindex(selectvar, True)
            elif not isinstance(selectvar, int):
                raise TypeError("selectvar misspecified; invalid type")
            elif not -nvar <= selectvar < nvar:
                raise IndexError("selectvar index out of range")
            rownums = tuple(r for r in rownums if _st_data(r, selectvar) != 0)

    return StataView(rownums, varnums)