Ejemplo n.º 1
0
    def pdf(self, endog_predict=None, exog_predict=None):
        r"""
        Evaluate the probability density function.

        Parameters
        ----------
        endog_predict: array_like, optional
            Evaluation data for the dependent variables.  If unspecified, the
            training data is used.
        exog_predict: array_like, optional
            Evaluation data for the independent variables.

        Returns
        -------
        pdf: array_like
            The value of the probability density at `endog_predict` and `exog_predict`.

        Notes
        -----
        The formula for the conditional probability density is:

        .. math:: f(X|Y)=\frac{f(X,Y)}{f(Y)}

        with

        .. math:: f(X)=\prod_{s=1}^{q}h_{s}^{-1}k
                            \left(\frac{X_{is}-X_{js}}{h_{s}}\right)

        where :math:`k` is the appropriate kernel for each variable.
        """
        if endog_predict is None:
            endog_predict = self.endog
        else:
            endog_predict = _adjust_shape(endog_predict, self.k_dep)
        if exog_predict is None:
            exog_predict = self.exog
        else:
            exog_predict = _adjust_shape(exog_predict, self.k_indep)

        pdf_est = []
        data_predict = np.column_stack((endog_predict, exog_predict))
        for i in xrange(np.shape(data_predict)[0]):
            f_yx = gpke(self.bw,
                        data=self.data,
                        data_predict=data_predict[i, :],
                        var_type=(self.dep_type + self.indep_type))
            f_x = gpke(self.bw[self.k_dep:],
                       data=self.exog,
                       data_predict=exog_predict[i, :],
                       var_type=self.indep_type)
            pdf_est.append(f_yx / f_x)

        return np.squeeze(pdf_est)
Ejemplo n.º 2
0
    def pdf(self, endog_predict=None, exog_predict=None):
        r"""
        Evaluate the probability density function.

        Parameters
        ----------
        endog_predict: array_like, optional
            Evaluation data for the dependent variables.  If unspecified, the
            training data is used.
        exog_predict: array_like, optional
            Evaluation data for the independent variables.

        Returns
        -------
        pdf: array_like
            The value of the probability density at `endog_predict` and `exog_predict`.

        Notes
        -----
        The formula for the conditional probability density is:

        .. math:: f(X|Y)=\frac{f(X,Y)}{f(Y)}

        with

        .. math:: f(X)=\prod_{s=1}^{q}h_{s}^{-1}k
                            \left(\frac{X_{is}-X_{js}}{h_{s}}\right)

        where :math:`k` is the appropriate kernel for each variable.
        """
        if endog_predict is None:
            endog_predict = self.endog
        else:
            endog_predict = _adjust_shape(endog_predict, self.k_dep)
        if exog_predict is None:
            exog_predict = self.exog
        else:
            exog_predict = _adjust_shape(exog_predict, self.k_indep)

        pdf_est = []
        data_predict = np.column_stack((endog_predict, exog_predict))
        for i in xrange(np.shape(data_predict)[0]):
            f_yx = gpke(self.bw, data=self.data,
                        data_predict=data_predict[i, :],
                        var_type=(self.dep_type + self.indep_type))
            f_x = gpke(self.bw[self.k_dep:], data=self.exog,
                       data_predict=exog_predict[i, :],
                       var_type=self.indep_type)
            pdf_est.append(f_yx / f_x)

        return np.squeeze(pdf_est)
Ejemplo n.º 3
0
    def _compute_lambda(self, Y, X):
        """Computes only lambda -- the main part of the test statistic"""
        n = np.shape(X)[0]
        Y = _adjust_shape(Y, 1)
        X = _adjust_shape(X, self.k_vars)
        b = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
                        defaults = EstimatorSettings(efficient=False)).fit()[1]

        b = b[:, self.test_vars]
        b = np.reshape(b, (n, len(self.test_vars)))
        #fct = np.std(b)  # Pivot the statistic by dividing by SE
        fct = 1.  # Don't Pivot -- Bootstrapping works better if Pivot
        lam = ((b / fct) ** 2).sum() / float(n)
        return lam
Ejemplo n.º 4
0
    def pdf(self, data_predict=None):
        r"""
        Evaluate the probability density function.

        Parameters
        ----------
        data_predict: array_like, optional
            Points to evaluate at.  If unspecified, the training data is used.

        Returns
        -------
        pdf_est: array_like
            Probability density function evaluated at `data_predict`.

        Notes
        -----
        The probability density is given by the generalized product kernel
        estimator:

        .. math:: K_{h}(X_{i},X_{j}) =
            \prod_{s=1}^{q}h_{s}^{-1}k\left(\frac{X_{is}-X_{js}}{h_{s}}\right)
        """
        if data_predict is None:
            data_predict = self.data
        else:
            data_predict = _adjust_shape(data_predict, self.k_vars)

        pdf_est = []
        for i in xrange(np.shape(data_predict)[0]):
            pdf_est.append(gpke(self.bw, data=self.data,
                                data_predict=data_predict[i, :],
                                var_type=self.var_type) / self.nobs)

        pdf_est = np.squeeze(pdf_est)
        return pdf_est
Ejemplo n.º 5
0
    def fit(self, data_predict=None):
        """
        Returns the mean and marginal effects at the `data_predict` points.

        Parameters
        ----------
        data_predict : array_like, optional
            Points at which to return the mean and marginal effects.  If not
            given, ``data_predict == exog``.

        Returns
        -------
        mean : ndarray
            The regression result for the mean (i.e. the actual curve).
        mfx : ndarray
            The marginal effects, i.e. the partial derivatives of the mean.

        """
        func = self.est[self.reg_type]
        if data_predict is None:
            data_predict = self.exog
        else:
            data_predict = _adjust_shape(data_predict, self.k_vars)

        N_data_predict = np.shape(data_predict)[0]
        mean = np.empty((N_data_predict,))
        mfx = np.empty((N_data_predict, self.k_vars))
        for i in xrange(N_data_predict):
            mean_mfx = func(self.bw, self.endog, self.exog,
                            data_predict=data_predict[i, :])
            mean[i] = mean_mfx[0]
            mfx_c = np.squeeze(mean_mfx[1])
            mfx[i, :] = mfx_c

        return mean, mfx
Ejemplo n.º 6
0
    def pdf(self, data_predict=None):
        r"""
        Evaluate the probability density function.

        Parameters
        ----------
        data_predict: array_like, optional
            Points to evaluate at.  If unspecified, the training data is used.

        Returns
        -------
        pdf_est: array_like
            Probability density function evaluated at `data_predict`.

        Notes
        -----
        The probability density is given by the generalized product kernel
        estimator:

        .. math:: K_{h}(X_{i},X_{j}) =
            \prod_{s=1}^{q}h_{s}^{-1}k\left(\frac{X_{is}-X_{js}}{h_{s}}\right)
        """
        if data_predict is None:
            data_predict = self.data
        else:
            data_predict = _adjust_shape(data_predict, self.k_vars)

        pdf_est = []
        for i in xrange(np.shape(data_predict)[0]):
            pdf_est.append(gpke(self.bw, data=self.data,
                                data_predict=data_predict[i, :],
                                var_type=self.var_type) / self.nobs)

        pdf_est = np.squeeze(pdf_est)
        return pdf_est
Ejemplo n.º 7
0
 def __init__(self, endog, exog, var_type, reg_type='ll', bw='cv_ls',
              defaults=EstimatorSettings()):
     self.var_type = var_type
     self.data_type = var_type
     self.reg_type = reg_type
     self.k_vars = len(self.var_type)
     self.endog = _adjust_shape(endog, 1)
     self.exog = _adjust_shape(exog, self.k_vars)
     self.data = np.column_stack((self.endog, self.exog))
     self.nobs = np.shape(self.exog)[0]
     self.bw_func = dict(cv_ls=self.cv_loo, aic=self.aic_hurvich)
     self.est = dict(lc=self._est_loc_constant, ll=self._est_loc_linear)
     self._set_defaults(defaults)
     if not self.efficient:
         self.bw = self._compute_reg_bw(bw)
     else:
         self.bw = self._compute_efficient(bw)
Ejemplo n.º 8
0
 def __init__(self, endog, exog, dep_type, indep_type, bw,
              defaults=EstimatorSettings()):
     self.dep_type = dep_type
     self.indep_type = indep_type
     self.data_type = dep_type + indep_type
     self.k_dep = len(self.dep_type)
     self.k_indep = len(self.indep_type)
     self.endog = _adjust_shape(endog, self.k_dep)
     self.exog = _adjust_shape(exog, self.k_indep)
     self.nobs, self.k_dep = np.shape(self.endog)
     self.data = np.column_stack((self.endog, self.exog))
     self.k_vars = np.shape(self.data)[1]
     self._set_defaults(defaults)
     if not self.efficient:
         self.bw = self._compute_bw(bw)
     else:
         self.bw = self._compute_efficient(bw)
Ejemplo n.º 9
0
 def __init__(self, endog, exog, dep_type, indep_type, bw,
              defaults=EstimatorSettings()):
     self.dep_type = dep_type
     self.indep_type = indep_type
     self.data_type = dep_type + indep_type
     self.k_dep = len(self.dep_type)
     self.k_indep = len(self.indep_type)
     self.endog = _adjust_shape(endog, self.k_dep)
     self.exog = _adjust_shape(exog, self.k_indep)
     self.nobs, self.k_dep = np.shape(self.endog)
     self.data = np.column_stack((self.endog, self.exog))
     self.k_vars = np.shape(self.data)[1]
     self._set_defaults(defaults)
     if not self.efficient:
         self.bw = self._compute_bw(bw)
     else:
         self.bw = self._compute_efficient(bw)
Ejemplo n.º 10
0
    def cdf(self, data_predict=None):
        r"""
        Evaluate the cumulative distribution function.

        Parameters
        ----------
        data_predict: array_like, optional
            Points to evaluate at.  If unspecified, the training data is used.

        Returns
        -------
        cdf_est: array_like
            The estimate of the cdf.

        Notes
        -----
        See http://en.wikipedia.org/wiki/Cumulative_distribution_function
        For more details on the estimation see Ref. [5] in module docstring.

        The multivariate CDF for mixed data (continuous and ordered/unordered
        discrete) is estimated by:

        ..math:: F(x^{c},x^{d})=n^{-1}\sum_{i=1}^{n}\left[G(
            \frac{x^{c}-X_{i}}{h})\sum_{u\leq x^{d}}L(X_{i}^{d},x_{i}^{d},
            \lambda)\right]

        where G() is the product kernel CDF estimator for the continuous
        and L() for the discrete variables.

        Used bandwidth is ``self.bw``.
        """
        if data_predict is None:
            data_predict = self.data
        else:
            data_predict = _adjust_shape(data_predict, self.k_vars)

        cdf_est = []
        for i in xrange(np.shape(data_predict)[0]):
            cdf_est.append(
                gpke(self.bw,
                     data=self.data,
                     data_predict=data_predict[i, :],
                     var_type=self.var_type,
                     ckertype="gaussian_cdf",
                     ukertype="aitchisonaitken_cdf",
                     okertype='wangryzin_cdf') / self.nobs)

        cdf_est = np.squeeze(cdf_est)
        return cdf_est
Ejemplo n.º 11
0
    def __init__(self, data, var_type, bw=None, defaults=EstimatorSettings()):
        self.var_type = var_type
        self.k_vars = len(self.var_type)
        self.data = _adjust_shape(data, self.k_vars)
        self.data_type = var_type
        self.nobs, self.k_vars = np.shape(self.data)
        if self.nobs <= self.k_vars:
            raise ValueError("The number of observations must be larger " \
                             "than the number of variables.")

        self._set_defaults(defaults)
        if not self.efficient:
            self.bw = self._compute_bw(bw)
        else:
            self.bw = self._compute_efficient(bw)
Ejemplo n.º 12
0
    def __init__(self, data, var_type, bw=None, defaults=EstimatorSettings()):
        self.var_type = var_type
        self.k_vars = len(self.var_type)
        self.data = _adjust_shape(data, self.k_vars)
        self.data_type = var_type
        self.nobs, self.k_vars = np.shape(self.data)
        if self.nobs <= self.k_vars:
            raise ValueError("The number of observations must be larger " \
                             "than the number of variables.")

        self._set_defaults(defaults)
        if not self.efficient:
            self.bw = self._compute_bw(bw)
        else:
            self.bw = self._compute_efficient(bw)
Ejemplo n.º 13
0
    def cdf(self, data_predict=None):
        r"""
        Evaluate the cumulative distribution function.

        Parameters
        ----------
        data_predict: array_like, optional
            Points to evaluate at.  If unspecified, the training data is used.

        Returns
        -------
        cdf_est: array_like
            The estimate of the cdf.

        Notes
        -----
        See http://en.wikipedia.org/wiki/Cumulative_distribution_function
        For more details on the estimation see Ref. [5] in module docstring.

        The multivariate CDF for mixed data (continuous and ordered/unordered
        discrete) is estimated by:

        ..math:: F(x^{c},x^{d})=n^{-1}\sum_{i=1}^{n}\left[G(
            \frac{x^{c}-X_{i}}{h})\sum_{u\leq x^{d}}L(X_{i}^{d},x_{i}^{d},
            \lambda)\right]

        where G() is the product kernel CDF estimator for the continuous
        and L() for the discrete variables.

        Used bandwidth is ``self.bw``.
        """
        if data_predict is None:
            data_predict = self.data
        else:
            data_predict = _adjust_shape(data_predict, self.k_vars)

        cdf_est = []
        for i in xrange(np.shape(data_predict)[0]):
            cdf_est.append(gpke(self.bw, data=self.data,
                                data_predict=data_predict[i, :],
                                var_type=self.var_type,
                                ckertype="gaussian_cdf",
                                ukertype="aitchisonaitken_cdf",
                                okertype='wangryzin_cdf') / self.nobs)

        cdf_est = np.squeeze(cdf_est)
        return cdf_est
Ejemplo n.º 14
0
 def censored(self, censor_val):
     # see pp. 341-344 in [1]
     self.d = (self.endog != censor_val) * 1.
     ix = np.argsort(np.squeeze(self.endog))
     self.sortix = ix
     self.sortix_rev = np.zeros(ix.shape, int)
     self.sortix_rev[ix] = np.arange(len(ix))
     self.endog = np.squeeze(self.endog[ix])
     self.endog = _adjust_shape(self.endog, 1)
     self.exog = np.squeeze(self.exog[ix])
     self.d = np.squeeze(self.d[ix])
     self.W_in = np.empty((self.nobs, 1))
     for i in xrange(1, self.nobs + 1):
         P=1
         for j in xrange(1, i):
             P *= ((self.nobs - j)/(float(self.nobs)-j+1))**self.d[j-1]
         self.W_in[i-1,0] = P * self.d[i-1] / (float(self.nobs) - i + 1 )
Ejemplo n.º 15
0
    def fit(self, data_predict=None):
        """
        Returns the marginal effects at the data_predict points.
        """
        func = self.est[self.reg_type]
        if data_predict is None:
            data_predict = self.exog
        else:
            data_predict = _adjust_shape(data_predict, self.k_vars)

        N_data_predict = np.shape(data_predict)[0]
        mean = np.empty((N_data_predict,))
        mfx = np.empty((N_data_predict, self.k_vars))
        for i in xrange(N_data_predict):
            mean_mfx = func(self.bw, self.endog, self.exog,
                            data_predict=data_predict[i, :],
                            W=self.W_in)
            mean[i] = mean_mfx[0]
            mfx_c = np.squeeze(mean_mfx[1])
            mfx[i, :] = mfx_c

        return mean, mfx
Ejemplo n.º 16
0
    def cdf(self, endog_predict=None, exog_predict=None):
        r"""
        Cumulative distribution function for the conditional density.

        Parameters
        ----------
        endog_predict: array_like, optional
            The evaluation dependent variables at which the cdf is estimated.
            If not specified the training dependent variables are used.
        exog_predict: array_like, optional
            The evaluation independent variables at which the cdf is estimated.
            If not specified the training independent variables are used.

        Returns
        -------
        cdf_est: array_like
            The estimate of the cdf.

        Notes
        -----
        For more details on the estimation see [5], and p.181 in [1].

        The multivariate conditional CDF for mixed data (continuous and
        ordered/unordered discrete) is estimated by:

        ..math:: F(y|x)=\frac{n^{-1}\sum_{i=1}^{n}G(\frac{y-Y_{i}}{h_{0}})
                              W_{h}(X_{i},x)}{\widehat{\mu}(x)}

        where G() is the product kernel CDF estimator for the dependent (y)
        variable(s) and W() is the product kernel CDF estimator for the
        independent variable(s).
        """
        if endog_predict is None:
            endog_predict = self.endog
        else:
            endog_predict = _adjust_shape(endog_predict, self.k_dep)
        if exog_predict is None:
            exog_predict = self.exog
        else:
            exog_predict = _adjust_shape(exog_predict, self.k_indep)

        N_data_predict = np.shape(exog_predict)[0]
        cdf_est = np.empty(N_data_predict)
        for i in xrange(N_data_predict):
            mu_x = gpke(self.bw[self.k_dep:], data=self.exog,
                        data_predict=exog_predict[i, :],
                        var_type=self.indep_type) / self.nobs
            mu_x = np.squeeze(mu_x)
            cdf_endog = gpke(self.bw[0:self.k_dep], data=self.endog,
                             data_predict=endog_predict[i, :],
                             var_type=self.dep_type,
                             ckertype="gaussian_cdf",
                             ukertype="aitchisonaitken_cdf",
                             okertype='wangryzin_cdf', tosum=False)

            cdf_exog = gpke(self.bw[self.k_dep:], data=self.exog,
                            data_predict=exog_predict[i, :],
                            var_type=self.indep_type, tosum=False)
            S = (cdf_endog * cdf_exog).sum(axis=0)
            cdf_est[i] = S / (self.nobs * mu_x)

        return cdf_est
Ejemplo n.º 17
0
    def cdf(self, endog_predict=None, exog_predict=None):
        r"""
        Cumulative distribution function for the conditional density.

        Parameters
        ----------
        endog_predict: array_like, optional
            The evaluation dependent variables at which the cdf is estimated.
            If not specified the training dependent variables are used.
        exog_predict: array_like, optional
            The evaluation independent variables at which the cdf is estimated.
            If not specified the training independent variables are used.

        Returns
        -------
        cdf_est: array_like
            The estimate of the cdf.

        Notes
        -----
        For more details on the estimation see [5], and p.181 in [1].

        The multivariate conditional CDF for mixed data (continuous and
        ordered/unordered discrete) is estimated by:

        ..math:: F(y|x)=\frac{n^{-1}\sum_{i=1}^{n}G(\frac{y-Y_{i}}{h_{0}})
                              W_{h}(X_{i},x)}{\widehat{\mu}(x)}

        where G() is the product kernel CDF estimator for the dependent (y)
        variable(s) and W() is the product kernel CDF estimator for the
        independent variable(s).
        """
        if endog_predict is None:
            endog_predict = self.endog
        else:
            endog_predict = _adjust_shape(endog_predict, self.k_dep)
        if exog_predict is None:
            exog_predict = self.exog
        else:
            exog_predict = _adjust_shape(exog_predict, self.k_indep)

        N_data_predict = np.shape(exog_predict)[0]
        cdf_est = np.empty(N_data_predict)
        for i in xrange(N_data_predict):
            mu_x = gpke(self.bw[self.k_dep:], data=self.exog,
                        data_predict=exog_predict[i, :],
                        var_type=self.indep_type) / self.nobs
            mu_x = np.squeeze(mu_x)
            cdf_endog = gpke(self.bw[0:self.k_dep], data=self.endog,
                             data_predict=endog_predict[i, :],
                             var_type=self.dep_type,
                             ckertype="gaussian_cdf",
                             ukertype="aitchisonaitken_cdf",
                             okertype='wangryzin_cdf', tosum=False)

            cdf_exog = gpke(self.bw[self.k_dep:], data=self.exog,
                            data_predict=exog_predict[i, :],
                            var_type=self.indep_type, tosum=False)
            S = (cdf_endog * cdf_exog).sum(axis=0)
            cdf_est[i] = S / (self.nobs * mu_x)

        return cdf_est