Exemple #1
0
    def summary(self,
                yname=None,
                xname=None,
                zname=None,
                title=None,
                alpha=.05):
        """Summarize the Heckman model Results
        Parameters
        -----------
        yname : string, optional
            Default is `y`
        xname : list of strings, optional
            Default is `x_##` for ## in p the number of regressors
            in the regression (response) equation.
        zname : list of strings, optional
            Default is `z_##` for ## in p the number of regressors
            in the selection equation.
        title : string, optional
            Title for the top table. If not None, then this replaces the
            default title
        alpha : float
            significance level for the confidence intervals
        Returns
        -------
        smry : Summary instance
            this holds the summary tables and text, which can be printed or
            converted to various output formats.
        See Also
        --------
        statsmodels.iolib.summary.Summary : class to hold summary
            results
        """

        ## Put in Z name detected from data if none supplied, unless that too could not be
        ## inferred from data, then put in generic names
        if zname is None and self.model.exog_select_names is not None:
            zname = self.model.exog_select_names
        elif zname is None and self.model.exog_select_names is None:
            try:
                zname = [
                    'z' + str(i) for i in range(len(self.model.exog_select[0]))
                ]
                zname[0] = 'z0_or_zconst'
            except TypeError:
                zname = 'z0_or_zconst'

        if isinstance(zname, str):
            zname = [zname]

        ## create summary object
        # instantiate the object
        smry = summary.Summary()

        # add top info
        if self.method == 'twostep':
            methodstr = 'Heckman Two-Step'
        elif self.method == 'mle':
            methodstr = 'Heckman MLE'
        else:
            raise ValueError("Invalid method set")

        top_left = [
            ('Dep. Variable:', None),
            ('Model:', None),
            ('Method:', [methodstr]),
            ('Date:', None),
            ('Time:', None),
            ('No. Total Obs.:', ["%#i" % self.model.nobs_total]),
            ('No. Censored Obs.:', ["%#i" % self.model.nobs_censored]),
            ('No. Uncensored Obs.:', ["%#i" % self.model.nobs_uncensored]),
        ]

        if hasattr(self, 'cov_type'):
            top_left.append(('Covariance Type:', [self.cov_type]))

        top_right = []

        if title is None:
            title = self.model.__class__.__name__ + ' ' + "Regression Results"

        smry.add_table_2cols(self,
                             gleft=top_left,
                             gright=top_right,
                             yname=yname,
                             xname=xname,
                             title=title)

        # add the Heckman-corrected regression table
        smry.add_table_params(self,
                              yname=yname,
                              xname=xname,
                              alpha=alpha,
                              use_t=self.use_t)

        # add the selection equation estimates table
        smry.add_table_params(self.select_res,
                              yname=yname,
                              xname=zname,
                              alpha=alpha,
                              use_t=self.select_res.use_t)

        # add the estimate to the inverse Mills estimate (z-score)
        smry.add_table_params(base.LikelihoodModelResults(
            None,
            np.atleast_1d(self.params_inverse_mills),
            normalized_cov_params=np.atleast_1d(self.stderr_inverse_mills**2),
            scale=1.),
                              yname=None,
                              xname=['IMR (Lambda)'],
                              alpha=alpha,
                              use_t=False)

        # add point estimates for rho and sigma
        diagn_left = [
            ('rho:', ["%#6.3f" % self.corr_eqnerrors]),
            ('sigma:', ["%#6.3f" % np.sqrt(self.var_reg_error)]),
        ]

        diagn_right = []

        smry.add_table_2cols(self,
                             gleft=diagn_left,
                             gright=diagn_right,
                             yname=yname,
                             xname=xname,
                             title="")

        # add text at end
        smry.add_extra_txt([
            'First table are the estimates for the regression (response) equation.',
            'Second table are the estimates for the selection equation.',
            'Third table is the estimate for the coef of the inverse Mills ratio (Heckman\'s Lambda).'
        ])

        ## return
        return smry
Exemple #2
0
    def _fit_mle(self,
                 start_params_mle=None,
                 method_mle=None,
                 maxiter_mle=None,
                 **kwargs_mle):
        # get number of X parameters and number of Z parameters
        Y, X, Z = self.get_datamats()
        num_xvars = X.shape[1]
        num_zvars = Z.shape[1]

        # let the Heckman two-step parameter estimates be the starting values
        # of the the optimizer of the Heckman MLE estimate if not specified by user
        if start_params_mle is None:
            twostep_res = self._fit_twostep()

            xparams = np.asarray(twostep_res.params)
            zparams = np.asarray(twostep_res.select_res.params)
            params_all = np.append(xparams, zparams)
            params_all = np.append(params_all,
                                   np.log(np.sqrt(twostep_res.var_reg_error)))
            params_all = np.append(params_all, (1. / 2.) * np.log(
                (1 + twostep_res.corr_eqnerrors) /
                (1 - twostep_res.corr_eqnerrors)))

            start_params_mle = params_all

        # fit Heckman parameters by MLE
        results_mle = super(Heckman, self).fit(start_params=start_params_mle,
                                               method=method_mle,
                                               maxiter=maxiter_mle,
                                               **kwargs_mle)

        xbeta_hat = np.asarray(results_mle.params[:num_xvars])  # reg eqn coefs
        zbeta_hat = np.asarray(
            results_mle.params[num_xvars:num_xvars +
                               num_zvars])  # selection eqn coefs

        log_sigma_hat = results_mle.params[-2]
        atanh_rho_hat = results_mle.params[-1]

        sigma_hat = np.exp(log_sigma_hat)
        rho_hat = np.tanh(atanh_rho_hat)

        scale = results_mle.scale

        xbeta_ncov_hat = results_mle.normalized_cov_params[:num_xvars, :
                                                           num_xvars]
        zbeta_ncov_hat = results_mle.normalized_cov_params[num_xvars:(
            num_xvars + num_zvars), num_xvars:(num_xvars + num_zvars)]

        imr_hat = rho_hat * sigma_hat

        # use the Delta method to compute the variance of lambda (the inverse Mills ratio)
        log_sigma_var_hat = results_mle.normalized_cov_params[-2, -2] * scale
        atanh_rho_var_hat = results_mle.normalized_cov_params[-1, -1] * scale

        def grad_lambda(log_sigma, atanh_rho):
            return np.array([atanh_rho, log_sigma])

        grad_lambda_hat = np.atleast_2d(grad_lambda(sigma_hat, rho_hat))
        covmat = results_mle.normalized_cov_params[-2:, -2:] * scale

        imr_stderr_hat = np.sqrt(
            grad_lambda_hat.dot(covmat).dot(grad_lambda_hat.T)[0, 0])

        del grad_lambda_hat
        del covmat

        # fill in results for this fit, and return

        DUMMY_COEF_STDERR_IMR = 0.

        results = HeckmanResults(self,
                                 xbeta_hat,
                                 xbeta_ncov_hat,
                                 scale,
                                 select_res=base.LikelihoodModelResults(
                                     None, zbeta_hat, zbeta_ncov_hat, scale),
                                 params_inverse_mills=imr_hat,
                                 stderr_inverse_mills=imr_stderr_hat,
                                 var_reg_error=sigma_hat**2,
                                 corr_eqnerrors=rho_hat,
                                 method='mle')

        return results