예제 #1
0
파일: build.py 프로젝트: guyrt/patsy
 def transform(self, data):
     data = asarray_or_pandas(data)
     _max_allowed_dim(1, data, self.factor)
     # issubdtype(int, bool) is true! So we can't use it:
     if not data.dtype.kind == "b":
         raise PatsyError("factor %s, which I thought was boolean, "
                             "gave non-boolean data of dtype %s"
                             % (self.factor.name(), data.dtype),
                             self.factor)
     return Categorical(data, levels=[False, True])
예제 #2
0
파일: state.py 프로젝트: Zaharid/patsy
 def transform(self, x, center=True, rescale=True, ddof=0):
     # XX: this forces all inputs to double-precision real, even if the
     # input is single- or extended-precision or complex. But I got all
     # tangled up in knots trying to do that without breaking something
     # else (e.g. by requiring an extra copy).
     x = asarray_or_pandas(x, copy=True, dtype=float)
     x_2d = atleast_2d_column_default(x, preserve_pandas=True)
     if center:
         x_2d -= self.current_mean
     if rescale:
         x_2d /= np.sqrt(self.current_M2 / (self.current_n - ddof))
     return pandas_friendly_reshape(x_2d, x.shape)
예제 #3
0
 def transform(self, x, center=True, rescale=True, ddof=0):
     # XX: this forces all inputs to double-precision real, even if the
     # input is single- or extended-precision or complex. But I got all
     # tangled up in knots trying to do that without breaking something
     # else (e.g. by requiring an extra copy).
     x = asarray_or_pandas(x, copy=True, dtype=float)
     x_2d = atleast_2d_column_default(x, preserve_pandas=True)
     if center:
         x_2d -= self.current_mean
     if rescale:
         x_2d /= np.sqrt(self.current_M2 / (self.current_n - ddof))
     return pandas_friendly_reshape(x_2d, x.shape)
예제 #4
0
파일: categorical.py 프로젝트: guyrt/patsy
 def __init__(self, int_array, levels, contrast=None):
     self.int_array = asarray_or_pandas(int_array, dtype=int)
     if self.int_array.ndim != 1:
         if self.int_array.ndim == 2 and self.int_array.shape[1] == 1:
             new_shape = (self.int_array.shape[0],)
             self.int_array = pandas_friendly_reshape(self.int_array,
                                                      new_shape)
         else:
             raise PatsyError("Categorical data must be 1 dimensional "
                                 "or column vector")
     self.levels = tuple(levels)
     self.contrast = contrast
예제 #5
0
파일: state.py 프로젝트: Zaharid/patsy
 def transform(self, x):
     x = asarray_or_pandas(x)
     # This doesn't copy data unless our input is a DataFrame that has
     # heterogenous types. And in that case we're going to be munging the
     # types anyway, so copying isn't a big deal.
     x_arr = np.asarray(x)
     if np.issubdtype(x_arr.dtype, np.integer):
         dt = float
     else:
         dt = x_arr.dtype
     mean_val = np.asarray(self._sum / self._count, dtype=dt)
     centered = atleast_2d_column_default(x, preserve_pandas=True) - mean_val
     return pandas_friendly_reshape(centered, x.shape)
예제 #6
0
 def transform(self, x):
     x = asarray_or_pandas(x)
     # This doesn't copy data unless our input is a DataFrame that has
     # heterogeneous types. And in that case we're going to be munging the
     # types anyway, so copying isn't a big deal.
     x_arr = np.asarray(x)
     if safe_issubdtype(x_arr.dtype, np.integer):
         dt = float
     else:
         dt = x_arr.dtype
     mean_val = np.asarray(self._sum / self._count, dtype=dt)
     centered = atleast_2d_column_default(x, preserve_pandas=True) - mean_val
     return pandas_friendly_reshape(centered, x.shape)
예제 #7
0
def _do_highlevel_design(formula_like, data, eval_env, return_type):
    if return_type == "dataframe" and not have_pandas:
        raise PatsyError("pandas.DataFrame was requested, but pandas " "is not installed")
    if return_type not in ("matrix", "dataframe"):
        raise PatsyError("unrecognized output type %r, should be " "'matrix' or 'dataframe'" % (return_type,))

    def data_iter_maker():
        return iter([data])

    builders = _try_incr_builders(formula_like, data_iter_maker, eval_env)
    if builders is not None:
        return build_design_matrices(builders, data, return_type=return_type)
    else:
        # No builders, but maybe we can still get matrices
        if isinstance(formula_like, tuple):
            if len(formula_like) != 2:
                raise PatsyError("don't know what to do with a length %s " "matrices tuple" % (len(formula_like),))
            (lhs, rhs) = formula_like
        else:
            # subok=True is necessary here to allow DesignMatrixes to pass
            # through
            (lhs, rhs) = (None, asarray_or_pandas(formula_like, subok=True))
        # some sort of explicit matrix or matrices were given. Currently we
        # have them in one of these forms:
        #   -- an ndarray or subclass
        #   -- a DesignMatrix
        #   -- a pandas.Series
        #   -- a pandas.DataFrame
        # and we have to produce a standard output format.
        def _regularize_matrix(m, default_column_prefix):
            di = DesignInfo.from_array(m, default_column_prefix)
            if have_pandas and isinstance(m, (pandas.Series, pandas.DataFrame)):
                orig_index = m.index
            else:
                orig_index = None
            if return_type == "dataframe":
                m = atleast_2d_column_default(m, preserve_pandas=True)
                m = pandas.DataFrame(m)
                m.columns = di.column_names
                m.design_info = di
                return (m, orig_index)
            else:
                return (DesignMatrix(m, di), orig_index)

        rhs, rhs_orig_index = _regularize_matrix(rhs, "x")
        if lhs is None:
            lhs = np.zeros((rhs.shape[0], 0), dtype=float)
        lhs, lhs_orig_index = _regularize_matrix(lhs, "y")

        assert isinstance(getattr(lhs, "design_info", None), DesignInfo)
        assert isinstance(getattr(rhs, "design_info", None), DesignInfo)
        if lhs.shape[0] != rhs.shape[0]:
            raise PatsyError(
                "shape mismatch: outcome matrix has %s rows, "
                "predictor matrix has %s rows" % (lhs.shape[0], rhs.shape[0])
            )
        if rhs_orig_index is not None and lhs_orig_index is not None:
            if not rhs_orig_index.equals(lhs_orig_index):
                raise PatsyError("index mismatch: outcome and " "predictor have incompatible indexes")
        if return_type == "dataframe":
            if rhs_orig_index is not None and lhs_orig_index is None:
                lhs.index = rhs.index
            if rhs_orig_index is None and lhs_orig_index is not None:
                rhs.index = lhs.index
        return (lhs, rhs)
예제 #8
0
def _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type):
    if return_type == "dataframe" and not have_pandas:
        raise PatsyError("pandas.DataFrame was requested, but pandas "
                         "is not installed")
    if return_type not in ("matrix", "dataframe"):
        raise PatsyError("unrecognized output type %r, should be "
                         "'matrix' or 'dataframe'" % (return_type, ))

    def data_iter_maker():
        return iter([data])

    builders = _try_incr_builders(formula_like, data_iter_maker, eval_env,
                                  NA_action)
    if builders is not None:
        return build_design_matrices(builders,
                                     data,
                                     NA_action=NA_action,
                                     return_type=return_type)
    else:
        # No builders, but maybe we can still get matrices
        if isinstance(formula_like, tuple):
            if len(formula_like) != 2:
                raise PatsyError("don't know what to do with a length %s "
                                 "matrices tuple" % (len(formula_like), ))
            (lhs, rhs) = formula_like
        else:
            # subok=True is necessary here to allow DesignMatrixes to pass
            # through
            (lhs, rhs) = (None, asarray_or_pandas(formula_like, subok=True))
        # some sort of explicit matrix or matrices were given. Currently we
        # have them in one of these forms:
        #   -- an ndarray or subclass
        #   -- a DesignMatrix
        #   -- a pandas.Series
        #   -- a pandas.DataFrame
        # and we have to produce a standard output format.
        def _regularize_matrix(m, default_column_prefix):
            di = DesignInfo.from_array(m, default_column_prefix)
            if have_pandas and isinstance(m,
                                          (pandas.Series, pandas.DataFrame)):
                orig_index = m.index
            else:
                orig_index = None
            if return_type == "dataframe":
                m = atleast_2d_column_default(m, preserve_pandas=True)
                m = pandas.DataFrame(m)
                m.columns = di.column_names
                m.design_info = di
                return (m, orig_index)
            else:
                return (DesignMatrix(m, di), orig_index)

        rhs, rhs_orig_index = _regularize_matrix(rhs, "x")
        if lhs is None:
            lhs = np.zeros((rhs.shape[0], 0), dtype=float)
        lhs, lhs_orig_index = _regularize_matrix(lhs, "y")

        assert isinstance(getattr(lhs, "design_info", None), DesignInfo)
        assert isinstance(getattr(rhs, "design_info", None), DesignInfo)
        if lhs.shape[0] != rhs.shape[0]:
            raise PatsyError("shape mismatch: outcome matrix has %s rows, "
                             "predictor matrix has %s rows" %
                             (lhs.shape[0], rhs.shape[0]))
        if rhs_orig_index is not None and lhs_orig_index is not None:
            if not rhs_orig_index.equals(lhs_orig_index):
                raise PatsyError("index mismatch: outcome and "
                                 "predictor have incompatible indexes")
        if return_type == "dataframe":
            if rhs_orig_index is not None and lhs_orig_index is None:
                lhs.index = rhs.index
            if rhs_orig_index is None and lhs_orig_index is not None:
                rhs.index = lhs.index
        return (lhs, rhs)