def from_formula(formula, data, *, weights=None, weight_type='robust', **weight_config): """ Parameters ---------- formula : str Patsy formula modified for the IV syntax described in the notes section data : DataFrame DataFrame containing the variables used in the formula weights : array-like, optional Observation weights used in estimation weight_type : str Name of moment condition weight function to use in the GMM estimation **weight_config Additional keyword arguments to pass to the moment condition weight function Notes ----- The IV formula modifies the standard Patsy formula to include a block of the form [endog ~ instruments] which is used to indicate the list of endogenous variables and instruments. The general structure is `dependent ~ exog [endog ~ instruments]` and it must be the case that the formula expressions constructed from blocks `dependent ~ exog endog` and `dependent ~ exog instruments` are both valid Patsy formulas. A constant must be explicitly included using '1 +' if required. Returns ------- model : IVGMM Model instance Examples -------- >>> import numpy as np >>> from linearmodels.datasets import wage >>> from linearmodels.iv import IVGMM >>> data = wage.load() >>> formula = 'np.log(wage) ~ 1 + exper + exper ** 2 + brthord + [educ ~ sibs]' >>> mod = IVGMM.from_formula(formula, data) """ dep, exog, endog, instr = parse_formula(formula, data) mod = IVGMM(dep, exog, endog, instr, weights=weights, weight_type=weight_type, **weight_config) mod.formula = formula return mod
def from_formula(formula, data, *, weights=None, fuller=0, kappa=None): """ Parameters ---------- formula : str Patsy formula modified for the IV syntax described in the notes section data : DataFrame DataFrame containing the variables used in the formula weights : array-like, optional Observation weights used in estimation fuller : float, optional Fuller's alpha to modify LIML estimator. Default returns unmodified LIML estimator. kappa : float, optional Parameter value for k-class estimation. If not provided, computed to produce LIML parameter estimate. Returns ------- model : IVLIML Model instance Notes ----- The IV formula modifies the standard Patsy formula to include a block of the form [endog ~ instruments] which is used to indicate the list of endogenous variables and instruments. The general structure is `dependent ~ exog [endog ~ instruments]` and it must be the case that the formula expressions constructed from blocks `dependent ~ exog endog` and `dependent ~ exog instruments` are both valid Patsy formulas. A constant must be explicitly included using '1 +' if required. Examples -------- >>> import numpy as np >>> from linearmodels.datasets import wage >>> from linearmodels.iv import IVLIML >>> data = wage.load() >>> formula = 'np.log(wage) ~ 1 + exper + exper ** 2 + brthord + [educ ~ sibs]' >>> mod = IVLIML.from_formula(formula, data) """ dep, exog, endog, instr = parse_formula(formula, data) mod = IVLIML(dep, exog, endog, instr, weights=weights, fuller=fuller, kappa=kappa) mod.formula = formula return mod
def from_formula(cls, formula, data, *, sigma=None, weights=None): """ Specify a 3SLS using the formula interface Parameters ---------- formula : {str, dict-like} Either a string or a dictionary of strings where each value in the dictionary represents a single equation. See Notes for a description of the accepted syntax data : DataFrame Frame containing named variables sigma : array-like Pre-specified residual covariance to use in GLS estimation. If not provided, FGLS is implemented based on an estimate of sigma. weights : dict-like Dictionary like object (e.g. a DataFrame) containing variable weights. Each entry must have the same number of observations as data. If an equation label is not a key weights, the weights will be set to unity Returns ------- model : IV3SLS Model instance Notes ----- Models can be specified in one of two ways. The first uses curly braces to encapsulate equations. The second uses a dictionary where each key is an equation name. Examples -------- The simplest format uses standard Patsy formulas for each equation in a dictionary. Best practice is to use an Ordered Dictionary >>> import pandas as pd >>> import numpy as np >>> cols = ['y1', 'x1_1', 'x1_2', 'z1', 'y2', 'x2_1', 'x2_2', 'z2'] >>> data = pd.DataFrame(np.random.randn(500, 8), columns=cols) >>> from linearmodels.system import IV3SLS >>> formula = {'eq1': 'y1 ~ 1 + x1_1 + [x1_2 ~ z1]', ... 'eq2': 'y2 ~ 1 + x2_1 + [x2_2 ~ z2]'} >>> mod = IV3SLS.from_formula(formula, data) The second format uses curly braces {} to surround distinct equations >>> formula = '{y1 ~ 1 + x1_1 + [x1_2 ~ z1]} {y2 ~ 1 + x2_1 + [x2_2 ~ z2]}' >>> mod = IV3SLS.from_formula(formula, data) It is also possible to include equation labels when using curly braces >>> formula = '{eq1: y1 ~ 1 + x1_1 + [x1_2 ~ z1]} {eq2: y2 ~ 1 + x2_1 + [x2_2 ~ z2]}' >>> mod = IV3SLS.from_formula(formula, data) """ if not isinstance(formula, (Mapping, str)): raise TypeError('formula must be a string or dictionary-like') missing_weight_keys = [] eqns = OrderedDict() if isinstance(formula, Mapping): for key in formula: f = formula[key] f = '~ 0 +'.join(f.split('~')) dep, exog, endog, instr = parse_formula(f, data) eqns[key] = { 'dependent': dep, 'exog': exog, 'endog': endog, 'instruments': instr } if weights is not None: if key in weights: eqns[key]['weights'] = weights[key] else: missing_weight_keys.append(key) _missing_weights(missing_weight_keys) return cls(eqns, sigma=sigma) formula = formula.replace('\n', ' ').strip() parts = formula.split('}') for i, part in enumerate(parts): base_key = None part = part.strip() if part == '': continue part = part.replace('{', '') if ':' in part.split('~')[0]: base_key, part = part.split(':') key = base_key = base_key.strip() part = part.strip() f = '~ 0 +'.join(part.split('~')) dep, exog, endog, instr = parse_formula(f, data) if base_key is None: base_key = key = f.split('~')[0].strip() count = 0 while key in eqns: key = base_key + '.{0}'.format(count) count += 1 eqns[key] = { 'dependent': dep, 'exog': exog, 'endog': endog, 'instruments': instr } if weights is not None: if key in weights: eqns[key]['weights'] = weights[key] else: missing_weight_keys.append(key) _missing_weights(missing_weight_keys) return cls(eqns, sigma=sigma)