def transform(self, data): data = asarray_or_pandas(data) _max_allowed_dim(1, data, self.factor) # issubdtype(int, bool) is true! So we can't use it: if not data.dtype.kind == "b": raise PatsyError("factor %s, which I thought was boolean, " "gave non-boolean data of dtype %s" % (self.factor.name(), data.dtype), self.factor) return Categorical(data, levels=[False, True])
def transform(self, x, center=True, rescale=True, ddof=0): # XX: this forces all inputs to double-precision real, even if the # input is single- or extended-precision or complex. But I got all # tangled up in knots trying to do that without breaking something # else (e.g. by requiring an extra copy). x = asarray_or_pandas(x, copy=True, dtype=float) x_2d = atleast_2d_column_default(x, preserve_pandas=True) if center: x_2d -= self.current_mean if rescale: x_2d /= np.sqrt(self.current_M2 / (self.current_n - ddof)) return pandas_friendly_reshape(x_2d, x.shape)
def __init__(self, int_array, levels, contrast=None): self.int_array = asarray_or_pandas(int_array, dtype=int) if self.int_array.ndim != 1: if self.int_array.ndim == 2 and self.int_array.shape[1] == 1: new_shape = (self.int_array.shape[0],) self.int_array = pandas_friendly_reshape(self.int_array, new_shape) else: raise PatsyError("Categorical data must be 1 dimensional " "or column vector") self.levels = tuple(levels) self.contrast = contrast
def transform(self, x): x = asarray_or_pandas(x) # This doesn't copy data unless our input is a DataFrame that has # heterogenous types. And in that case we're going to be munging the # types anyway, so copying isn't a big deal. x_arr = np.asarray(x) if np.issubdtype(x_arr.dtype, np.integer): dt = float else: dt = x_arr.dtype mean_val = np.asarray(self._sum / self._count, dtype=dt) centered = atleast_2d_column_default(x, preserve_pandas=True) - mean_val return pandas_friendly_reshape(centered, x.shape)
def transform(self, x): x = asarray_or_pandas(x) # This doesn't copy data unless our input is a DataFrame that has # heterogeneous types. And in that case we're going to be munging the # types anyway, so copying isn't a big deal. x_arr = np.asarray(x) if safe_issubdtype(x_arr.dtype, np.integer): dt = float else: dt = x_arr.dtype mean_val = np.asarray(self._sum / self._count, dtype=dt) centered = atleast_2d_column_default(x, preserve_pandas=True) - mean_val return pandas_friendly_reshape(centered, x.shape)
def _do_highlevel_design(formula_like, data, eval_env, return_type): if return_type == "dataframe" and not have_pandas: raise PatsyError("pandas.DataFrame was requested, but pandas " "is not installed") if return_type not in ("matrix", "dataframe"): raise PatsyError("unrecognized output type %r, should be " "'matrix' or 'dataframe'" % (return_type,)) def data_iter_maker(): return iter([data]) builders = _try_incr_builders(formula_like, data_iter_maker, eval_env) if builders is not None: return build_design_matrices(builders, data, return_type=return_type) else: # No builders, but maybe we can still get matrices if isinstance(formula_like, tuple): if len(formula_like) != 2: raise PatsyError("don't know what to do with a length %s " "matrices tuple" % (len(formula_like),)) (lhs, rhs) = formula_like else: # subok=True is necessary here to allow DesignMatrixes to pass # through (lhs, rhs) = (None, asarray_or_pandas(formula_like, subok=True)) # some sort of explicit matrix or matrices were given. Currently we # have them in one of these forms: # -- an ndarray or subclass # -- a DesignMatrix # -- a pandas.Series # -- a pandas.DataFrame # and we have to produce a standard output format. def _regularize_matrix(m, default_column_prefix): di = DesignInfo.from_array(m, default_column_prefix) if have_pandas and isinstance(m, (pandas.Series, pandas.DataFrame)): orig_index = m.index else: orig_index = None if return_type == "dataframe": m = atleast_2d_column_default(m, preserve_pandas=True) m = pandas.DataFrame(m) m.columns = di.column_names m.design_info = di return (m, orig_index) else: return (DesignMatrix(m, di), orig_index) rhs, rhs_orig_index = _regularize_matrix(rhs, "x") if lhs is None: lhs = np.zeros((rhs.shape[0], 0), dtype=float) lhs, lhs_orig_index = _regularize_matrix(lhs, "y") assert isinstance(getattr(lhs, "design_info", None), DesignInfo) assert isinstance(getattr(rhs, "design_info", None), DesignInfo) if lhs.shape[0] != rhs.shape[0]: raise PatsyError( "shape mismatch: outcome matrix has %s rows, " "predictor matrix has %s rows" % (lhs.shape[0], rhs.shape[0]) ) if rhs_orig_index is not None and lhs_orig_index is not None: if not rhs_orig_index.equals(lhs_orig_index): raise PatsyError("index mismatch: outcome and " "predictor have incompatible indexes") if return_type == "dataframe": if rhs_orig_index is not None and lhs_orig_index is None: lhs.index = rhs.index if rhs_orig_index is None and lhs_orig_index is not None: rhs.index = lhs.index return (lhs, rhs)
def _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type): if return_type == "dataframe" and not have_pandas: raise PatsyError("pandas.DataFrame was requested, but pandas " "is not installed") if return_type not in ("matrix", "dataframe"): raise PatsyError("unrecognized output type %r, should be " "'matrix' or 'dataframe'" % (return_type, )) def data_iter_maker(): return iter([data]) builders = _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action) if builders is not None: return build_design_matrices(builders, data, NA_action=NA_action, return_type=return_type) else: # No builders, but maybe we can still get matrices if isinstance(formula_like, tuple): if len(formula_like) != 2: raise PatsyError("don't know what to do with a length %s " "matrices tuple" % (len(formula_like), )) (lhs, rhs) = formula_like else: # subok=True is necessary here to allow DesignMatrixes to pass # through (lhs, rhs) = (None, asarray_or_pandas(formula_like, subok=True)) # some sort of explicit matrix or matrices were given. Currently we # have them in one of these forms: # -- an ndarray or subclass # -- a DesignMatrix # -- a pandas.Series # -- a pandas.DataFrame # and we have to produce a standard output format. def _regularize_matrix(m, default_column_prefix): di = DesignInfo.from_array(m, default_column_prefix) if have_pandas and isinstance(m, (pandas.Series, pandas.DataFrame)): orig_index = m.index else: orig_index = None if return_type == "dataframe": m = atleast_2d_column_default(m, preserve_pandas=True) m = pandas.DataFrame(m) m.columns = di.column_names m.design_info = di return (m, orig_index) else: return (DesignMatrix(m, di), orig_index) rhs, rhs_orig_index = _regularize_matrix(rhs, "x") if lhs is None: lhs = np.zeros((rhs.shape[0], 0), dtype=float) lhs, lhs_orig_index = _regularize_matrix(lhs, "y") assert isinstance(getattr(lhs, "design_info", None), DesignInfo) assert isinstance(getattr(rhs, "design_info", None), DesignInfo) if lhs.shape[0] != rhs.shape[0]: raise PatsyError("shape mismatch: outcome matrix has %s rows, " "predictor matrix has %s rows" % (lhs.shape[0], rhs.shape[0])) if rhs_orig_index is not None and lhs_orig_index is not None: if not rhs_orig_index.equals(lhs_orig_index): raise PatsyError("index mismatch: outcome and " "predictor have incompatible indexes") if return_type == "dataframe": if rhs_orig_index is not None and lhs_orig_index is None: lhs.index = rhs.index if rhs_orig_index is None and lhs_orig_index is not None: rhs.index = lhs.index return (lhs, rhs)