Example #1
0
    def from_formula(cls,
                     formula,
                     data,
                     window,
                     weights=None,
                     subset=None,
                     *args,
                     **kwargs):
        if subset is not None:
            data = data.loc[subset]
        eval_env = kwargs.pop("eval_env", None)
        if eval_env is None:
            eval_env = 2
        elif eval_env == -1:
            from patsy import EvalEnvironment

            eval_env = EvalEnvironment({})
        else:
            eval_env += 1  # we're going down the stack again
        missing = kwargs.get("missing", "skip")
        from patsy import NAAction, dmatrices

        na_action = NAAction(on_NA="raise", NA_types=[])
        result = dmatrices(
            formula,
            data,
            eval_env,
            return_type="dataframe",
            NA_action=na_action,
        )

        endog, exog = result
        if (endog.ndim > 1 and endog.shape[1] > 1) or endog.ndim > 2:
            raise ValueError("endog has evaluated to an array with multiple "
                             "columns that has shape {0}. This occurs when "
                             "the variable converted to endog is non-numeric"
                             " (e.g., bool or str).".format(endog.shape))

        kwargs.update({"missing": missing, "window": window})
        if weights is not None:
            kwargs["weights"] = weights
        mod = cls(endog, exog, *args, **kwargs)
        mod.formula = formula
        # since we got a dataframe, attach the original
        mod.data.frame = data
        return mod
Example #2
0
    def from_formula(cls,
                     formula,
                     data,
                     window,
                     weights=None,
                     subset=None,
                     *args,
                     **kwargs):
        if subset is not None:
            data = data.loc[subset]
        eval_env = kwargs.pop('eval_env', None)
        if eval_env is None:
            eval_env = 2
        elif eval_env == -1:
            from patsy import EvalEnvironment
            eval_env = EvalEnvironment({})
        else:
            eval_env += 1  # we're going down the stack again
        missing = kwargs.get('missing', 'skip')
        from patsy import dmatrices, NAAction
        na_action = NAAction(on_NA='raise', NA_types=[])
        result = dmatrices(formula,
                           data,
                           eval_env,
                           return_type='dataframe',
                           NA_action=na_action)

        endog, exog = result
        if (endog.ndim > 1 and endog.shape[1] > 1) or endog.ndim > 2:
            raise ValueError('endog has evaluated to an array with multiple '
                             'columns that has shape {0}. This occurs when '
                             'the variable converted to endog is non-numeric'
                             ' (e.g., bool or str).'.format(endog.shape))

        kwargs.update({'missing': missing, 'window': window})
        if weights is not None:
            kwargs['weights'] = weights
        mod = cls(endog, exog, *args, **kwargs)
        mod.formula = formula
        # since we got a dataframe, attach the original
        mod.data.frame = data
        return mod
Example #3
0
def handle_formula_data(Y, X, formula, depth=0, missing='drop'):
    """
    Returns endog, exog, and the model specification from arrays and formula.

    Parameters
    ----------
    Y : array_like
        Either endog (the LHS) of a model specification or all of the data.
        Y must define __getitem__ for now.
    X : array_like
        Either exog or None. If all the data for the formula is provided in
        Y then you must explicitly set X to None.
    formula : str or patsy.model_desc
        You can pass a handler by import formula_handler and adding a
        key-value pair where the key is the formula object class and
        the value is a function that returns endog, exog, formula object.

    Returns
    -------
    endog : array_like
        Should preserve the input type of Y,X.
    exog : array_like
        Should preserve the input type of Y,X. Could be None.
    """
    # half ass attempt to handle other formula objects
    if isinstance(formula, tuple(iterkeys(formula_handler))):
        return formula_handler[type(formula)]

    na_action = NAAction(on_NA=missing)

    if X is not None:
        if data_util._is_using_pandas(Y, X):
            result = dmatrices(formula, (Y, X),
                               depth,
                               return_type='dataframe',
                               NA_action=na_action)
        else:
            result = dmatrices(formula, (Y, X),
                               depth,
                               return_type='dataframe',
                               NA_action=na_action)
    else:
        if data_util._is_using_pandas(Y, None):
            result = dmatrices(formula,
                               Y,
                               depth,
                               return_type='dataframe',
                               NA_action=na_action)
        else:
            result = dmatrices(formula,
                               Y,
                               depth,
                               return_type='dataframe',
                               NA_action=na_action)

    # if missing == 'raise' there's not missing_mask
    missing_mask = getattr(na_action, 'missing_mask', None)
    if not np.any(missing_mask):
        missing_mask = None
    if len(result) > 1:  # have RHS design
        design_info = result[1].design_info  # detach it from DataFrame
    else:
        design_info = None
    # NOTE: is there ever a case where we'd need LHS design_info?
    return result, missing_mask, design_info