def from_formula(cls, formula, data, window, weights=None, subset=None, *args, **kwargs): if subset is not None: data = data.loc[subset] eval_env = kwargs.pop("eval_env", None) if eval_env is None: eval_env = 2 elif eval_env == -1: from patsy import EvalEnvironment eval_env = EvalEnvironment({}) else: eval_env += 1 # we're going down the stack again missing = kwargs.get("missing", "skip") from patsy import NAAction, dmatrices na_action = NAAction(on_NA="raise", NA_types=[]) result = dmatrices( formula, data, eval_env, return_type="dataframe", NA_action=na_action, ) endog, exog = result if (endog.ndim > 1 and endog.shape[1] > 1) or endog.ndim > 2: raise ValueError("endog has evaluated to an array with multiple " "columns that has shape {0}. This occurs when " "the variable converted to endog is non-numeric" " (e.g., bool or str).".format(endog.shape)) kwargs.update({"missing": missing, "window": window}) if weights is not None: kwargs["weights"] = weights mod = cls(endog, exog, *args, **kwargs) mod.formula = formula # since we got a dataframe, attach the original mod.data.frame = data return mod
def from_formula(cls, formula, data, window, weights=None, subset=None, *args, **kwargs): if subset is not None: data = data.loc[subset] eval_env = kwargs.pop('eval_env', None) if eval_env is None: eval_env = 2 elif eval_env == -1: from patsy import EvalEnvironment eval_env = EvalEnvironment({}) else: eval_env += 1 # we're going down the stack again missing = kwargs.get('missing', 'skip') from patsy import dmatrices, NAAction na_action = NAAction(on_NA='raise', NA_types=[]) result = dmatrices(formula, data, eval_env, return_type='dataframe', NA_action=na_action) endog, exog = result if (endog.ndim > 1 and endog.shape[1] > 1) or endog.ndim > 2: raise ValueError('endog has evaluated to an array with multiple ' 'columns that has shape {0}. This occurs when ' 'the variable converted to endog is non-numeric' ' (e.g., bool or str).'.format(endog.shape)) kwargs.update({'missing': missing, 'window': window}) if weights is not None: kwargs['weights'] = weights mod = cls(endog, exog, *args, **kwargs) mod.formula = formula # since we got a dataframe, attach the original mod.data.frame = data return mod
def handle_formula_data(Y, X, formula, depth=0, missing='drop'): """ Returns endog, exog, and the model specification from arrays and formula. Parameters ---------- Y : array_like Either endog (the LHS) of a model specification or all of the data. Y must define __getitem__ for now. X : array_like Either exog or None. If all the data for the formula is provided in Y then you must explicitly set X to None. formula : str or patsy.model_desc You can pass a handler by import formula_handler and adding a key-value pair where the key is the formula object class and the value is a function that returns endog, exog, formula object. Returns ------- endog : array_like Should preserve the input type of Y,X. exog : array_like Should preserve the input type of Y,X. Could be None. """ # half ass attempt to handle other formula objects if isinstance(formula, tuple(iterkeys(formula_handler))): return formula_handler[type(formula)] na_action = NAAction(on_NA=missing) if X is not None: if data_util._is_using_pandas(Y, X): result = dmatrices(formula, (Y, X), depth, return_type='dataframe', NA_action=na_action) else: result = dmatrices(formula, (Y, X), depth, return_type='dataframe', NA_action=na_action) else: if data_util._is_using_pandas(Y, None): result = dmatrices(formula, Y, depth, return_type='dataframe', NA_action=na_action) else: result = dmatrices(formula, Y, depth, return_type='dataframe', NA_action=na_action) # if missing == 'raise' there's not missing_mask missing_mask = getattr(na_action, 'missing_mask', None) if not np.any(missing_mask): missing_mask = None if len(result) > 1: # have RHS design design_info = result[1].design_info # detach it from DataFrame else: design_info = None # NOTE: is there ever a case where we'd need LHS design_info? return result, missing_mask, design_info