コード例 #1
0
    def from_formula(formula,
                     data,
                     *,
                     weights=None,
                     weight_type='robust',
                     **weight_config):
        """
        Parameters
        ----------
        formula : str
            Patsy formula modified for the IV syntax described in the notes
            section
        data : DataFrame
            DataFrame containing the variables used in the formula
        weights : array-like, optional
            Observation weights used in estimation
        weight_type : str
            Name of moment condition weight function to use in the GMM estimation
        **weight_config
            Additional keyword arguments to pass to the moment condition weight
            function

        Notes
        -----
        The IV formula modifies the standard Patsy formula to include a
        block of the form [endog ~ instruments] which is used to indicate
        the list of endogenous variables and instruments.  The general
        structure is `dependent ~ exog [endog ~ instruments]` and it must
        be the case that the formula expressions constructed from blocks
        `dependent ~ exog endog` and `dependent ~ exog instruments` are both
        valid Patsy formulas.

        A constant must be explicitly included using '1 +' if required.

        Returns
        -------
        model : IVGMM
            Model instance

        Examples
        --------
        >>> import numpy as np
        >>> from linearmodels.datasets import wage
        >>> from linearmodels.iv import IVGMM
        >>> data = wage.load()
        >>> formula = 'np.log(wage) ~ 1 + exper + exper ** 2 + brthord + [educ ~ sibs]'
        >>> mod = IVGMM.from_formula(formula, data)
        """
        dep, exog, endog, instr = parse_formula(formula, data)
        mod = IVGMM(dep,
                    exog,
                    endog,
                    instr,
                    weights=weights,
                    weight_type=weight_type,
                    **weight_config)
        mod.formula = formula
        return mod
コード例 #2
0
    def from_formula(formula, data, *, weights=None, fuller=0, kappa=None):
        """
        Parameters
        ----------
        formula : str
            Patsy formula modified for the IV syntax described in the notes
            section
        data : DataFrame
            DataFrame containing the variables used in the formula
        weights : array-like, optional
            Observation weights used in estimation
        fuller : float, optional
            Fuller's alpha to modify LIML estimator. Default returns unmodified
            LIML estimator.
        kappa : float, optional
            Parameter value for k-class estimation.  If not provided, computed to
            produce LIML parameter estimate.

        Returns
        -------
        model : IVLIML
            Model instance

        Notes
        -----
        The IV formula modifies the standard Patsy formula to include a
        block of the form [endog ~ instruments] which is used to indicate
        the list of endogenous variables and instruments.  The general
        structure is `dependent ~ exog [endog ~ instruments]` and it must
        be the case that the formula expressions constructed from blocks
        `dependent ~ exog endog` and `dependent ~ exog instruments` are both
        valid Patsy formulas.

        A constant must be explicitly included using '1 +' if required.

        Examples
        --------
        >>> import numpy as np
        >>> from linearmodels.datasets import wage
        >>> from linearmodels.iv import IVLIML
        >>> data = wage.load()
        >>> formula = 'np.log(wage) ~ 1 + exper + exper ** 2 + brthord + [educ ~ sibs]'
        >>> mod = IVLIML.from_formula(formula, data)
        """
        dep, exog, endog, instr = parse_formula(formula, data)
        mod = IVLIML(dep,
                     exog,
                     endog,
                     instr,
                     weights=weights,
                     fuller=fuller,
                     kappa=kappa)
        mod.formula = formula
        return mod
コード例 #3
0
    def from_formula(cls, formula, data, *, sigma=None, weights=None):
        """
        Specify a 3SLS using the formula interface

        Parameters
        ----------
        formula : {str, dict-like}
            Either a string or a dictionary of strings where each value in
            the dictionary represents a single equation. See Notes for a
            description of the accepted syntax
        data : DataFrame
            Frame containing named variables
        sigma : array-like
            Pre-specified residual covariance to use in GLS estimation. If
            not provided, FGLS is implemented based on an estimate of sigma.
        weights : dict-like
            Dictionary like object (e.g. a DataFrame) containing variable
            weights.  Each entry must have the same number of observations as
            data.  If an equation label is not a key weights, the weights will
            be set to unity

        Returns
        -------
        model : IV3SLS
            Model instance

        Notes
        -----
        Models can be specified in one of two ways. The first uses curly
        braces to encapsulate equations.  The second uses a dictionary
        where each key is an equation name.

        Examples
        --------
        The simplest format uses standard Patsy formulas for each equation
        in a dictionary.  Best practice is to use an Ordered Dictionary

        >>> import pandas as pd
        >>> import numpy as np
        >>> cols = ['y1', 'x1_1', 'x1_2', 'z1', 'y2', 'x2_1', 'x2_2', 'z2']
        >>> data = pd.DataFrame(np.random.randn(500, 8), columns=cols)
        >>> from linearmodels.system import IV3SLS
        >>> formula = {'eq1': 'y1 ~ 1 + x1_1 + [x1_2 ~ z1]',
        ...            'eq2': 'y2 ~ 1 + x2_1 + [x2_2 ~ z2]'}
        >>> mod = IV3SLS.from_formula(formula, data)

        The second format uses curly braces {} to surround distinct equations

        >>> formula = '{y1 ~ 1 + x1_1 + [x1_2 ~ z1]} {y2 ~ 1 + x2_1 + [x2_2 ~ z2]}'
        >>> mod = IV3SLS.from_formula(formula, data)

        It is also possible to include equation labels when using curly braces

        >>> formula = '{eq1: y1 ~ 1 + x1_1 + [x1_2 ~ z1]} {eq2: y2 ~ 1 + x2_1 + [x2_2 ~ z2]}'
        >>> mod = IV3SLS.from_formula(formula, data)
        """
        if not isinstance(formula, (Mapping, str)):
            raise TypeError('formula must be a string or dictionary-like')

        missing_weight_keys = []
        eqns = OrderedDict()
        if isinstance(formula, Mapping):
            for key in formula:
                f = formula[key]
                f = '~ 0 +'.join(f.split('~'))
                dep, exog, endog, instr = parse_formula(f, data)
                eqns[key] = {
                    'dependent': dep,
                    'exog': exog,
                    'endog': endog,
                    'instruments': instr
                }
                if weights is not None:
                    if key in weights:
                        eqns[key]['weights'] = weights[key]
                    else:
                        missing_weight_keys.append(key)
            _missing_weights(missing_weight_keys)
            return cls(eqns, sigma=sigma)

        formula = formula.replace('\n', ' ').strip()
        parts = formula.split('}')
        for i, part in enumerate(parts):
            base_key = None
            part = part.strip()
            if part == '':
                continue
            part = part.replace('{', '')
            if ':' in part.split('~')[0]:
                base_key, part = part.split(':')
                key = base_key = base_key.strip()
                part = part.strip()
            f = '~ 0 +'.join(part.split('~'))
            dep, exog, endog, instr = parse_formula(f, data)
            if base_key is None:
                base_key = key = f.split('~')[0].strip()
            count = 0
            while key in eqns:
                key = base_key + '.{0}'.format(count)
                count += 1
            eqns[key] = {
                'dependent': dep,
                'exog': exog,
                'endog': endog,
                'instruments': instr
            }
            if weights is not None:
                if key in weights:
                    eqns[key]['weights'] = weights[key]
                else:
                    missing_weight_keys.append(key)

        _missing_weights(missing_weight_keys)

        return cls(eqns, sigma=sigma)