예제 #1
0
    def fit(self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.DataFrame, np.ndarray, Iterable]) \
            -> skbase.ClassifierMixin:
        X, y = self._check_X_y_fit(X, y)
        self.classes_ = np.unique(y)
        self.nfeatures_ = X.shape[1]
        bw = np.full(self.nfeatures_, self.bw) if isinstance(self.bw, float) else self.bw

        self.model_ = smkernel.KernelReg(endog=y * 2 - 1, exog=X, var_type='c' * self.nfeatures_,
                                         reg_type=self.reg_type, bw=bw,
                                         defaults=smkernelbase.EstimatorSettings(efficient=False))
        return self
예제 #2
0
def justfit(W,X,name,title):
    ngoods = W.shape[1]
    order = np.argsort(X)
    f, (ax) = plt.subplots(ngoods, sharex=True, sharey=False)
    f.set_size_inches(20.,50.)
    ax[0].set_title(title)
    for i in range(ngoods):
        for item in ([ax[i].title, ax[i].xaxis.label, ax[i].yaxis.label] +
             ax[i].get_xticklabels() + ax[i].get_yticklabels()):
             item.set_fontsize(20)
        model = kreg.KernelReg(endog=W.T[i], exog=X, var_type='c')
        sm_mean, sm_mfx = model.fit()
        ax[i].plot( X[order], sm_mean[order], '-r', alpha=1)
        ax[i].set_ylabel(goods[i])
    f.savefig(name + '.png')
예제 #3
0
def term2(x, y, z, gamma, kz, h):
    n = len(x)
    xps = []
    for i in range(n):
        Wi = (x[i], y[i], z[i])
        xp1 = phi_i(Wi, gamma)
        xps.append(xp1)
    nw = kr.KernelReg(xps, [z], 'c', reg_type='lc', bw='aic')
    means, marginals = nw.fit([z])
    for i in range(n):
        xp2 = Kh(h, kz, z[i])
        means[i] *= xp2
    mean = means[i].mean()
    result = mean
    print('E(phi(X|Z)*pdf(Z)) = ', result)
    return result
예제 #4
0
##################################################

# Now consider that the quadratic model may not be quite right.
# Maybe it is some other nonlinear function.

# A nonparametric approach can estimate the relationship
# flexibly to determine what functional form should be used.

# For kernel regression, we will pass the prices and horsepower
# as separate arrays.

y = tractors['log_saleprice']
X = tractors['horsepower']

# Initialize the model object.
kde_reg = npreg.KernelReg(endog=y, exog=X, var_type='c')

# Fit the predictions to a grid of values.
X_grid = np.arange(0, 500, 10)
kde_pred = kde_reg.fit(data_predict=X_grid)

# Plot the fitted curve with a scattergraph of the data.
fig, ax = plt.subplots()
ax.plot(tractors['horsepower'], tractors['log_saleprice'], '.', alpha=0.5)
ax.plot(X_grid, kde_pred[0], '-', color='tab:blue', alpha=0.9)
plt.show()

#--------------------------------------------------
# Tuning the bandwidth
#--------------------------------------------------
예제 #5
0
def iterative_wls(x, y, tol=1e-6, max_iter=100):
    
    """Run a weighted least squares linear regression with
       iterative refinement of variance. (This is computationally intensive!)
        Parameters
        ----------
        x : float
            predictor vector of size n*1
        y : float
            target variable of size n*1
        max_iter : int
                   Maximum number of iterations for IWLS. Default is 100
        tol: float
             tolerance level for norm of difference of successive estimates for
             coefficients of linear regression and robust estimates of spread
             of residuals. Defaults to 1e-6
        Returns
        -------
        coefs : float
            2*1 vector of coefficients of linear regression.
        """
        
    x = np.c_[np.ones(len(x)), x] # append column vector of 1's
    iteration = 0
    old_coefs = None

    #----------------------------------------
    # Run an OLS to get initial estimates
    #----------------------------------------
    regression = smf.WLS(y, sm.add_constant(x)).fit()
    coefs = regression.params

    while old_coefs is None or (np.max(abs(coefs - old_coefs)) > tol and
                                iteration < max_iter): 
    
        #----------------------------------------------------------------------
        # Construct the log-squared residuals and use a non-parametric
        # method (kernel regression) to estimate the conditonal mean. 
        # Residual can be 0 in which case log-squared residual is not defined.
        # Ignore the warning and put a small value for log-squared residual and
        # proceed. 
        
        # Exponentiate to predict the variance and take inverse of the variance 
        # as weights.
        #----------------------------------------------------------------------
        with np.errstate(divide='ignore', invalid='ignore'): 

            old_coefs = coefs
            log_squared_residuals = np.where(regression.resid**2 > 0, 
                                         np.log(regression.resid**2), 
                                         1e-12)
            model = nparam_kreg.KernelReg(endog=y,
                                      exog=log_squared_residuals,
                                      var_type='c')
            weights = np.exp(model.fit()[0])**-1

            #-------------------------------
            # Update regression coefficients
            #-------------------------------
            regression = sm.WLS(y, sm.add_constant(x), weights=weights).fit()
            coefs = regression.params
            iteration += 1

    return coefs
    SE = []
    for i in range(len(X)):
        X_1 = np.delete(X, i)
        Y_1 = np.delete(Y, i)
        X_pre, Y_pre = main(X_1, Y_1, h)
        Y_p = Y_pre[find_nearest(X_pre, X[i])]
        SE.append((Y_p - Y[i]) ** 2)
    MSE.append(np.mean(SE))
print(MSE.index(min(MSE)))
# So the best h is 0.686

X_pre, Y_pre = main(X, Y, 0.686)
plt.figure(figsize=(10,6))
plt.plot(X, Y, 'og')
plt.plot(X_pre, Y_pre, color='blue')
plt.xlabel('Crime Rate')
plt.ylabel('Price')
plt.legend(['Data','Pred'])
plt.show()

# 或者使用python的kernel regression包
model = kernel_regression.KernelReg(endog = Y, exog = [X],var_type = 'c',reg_type = "ll",bw = 'cv_ls',ckertype='gaussian')
pred = model.fit(X)[0]

plt.figure(figsize=(10,6))
plt.scatter(X,Y,color = 'green')
plt.scatter(X,pred,color = 'blue')
plt.xlabel('Crime Rate')
plt.ylabel('Price')
plt.legend(['Data','Pred'])
plt.show()