Python WLS Examples, statsmodels.formula.api.WLS Python Examples

Example #1

0

Show file

def forecast_mlr(_data,_col,predict,option,session_id):
    ###変数まとめ
    estimate_method = option[1]
    
    #データフレーム初期化
    _data_pr = pd.DataFrame(columns = [])
    data_rsp = pd.DataFrame(columns = [])
    data_stat = pd.DataFrame(columns = [])
    data_all = pd.DataFrame(columns = [])
    data_stat_preview = pd.DataFrame(columns = [])
    data_ori = pd.DataFrame(columns = [])
    
    ###選択された粒度ごとで計算
    _data_sum = _data

    ##model入力値
    holdout = int(option[0])
    _data_model = _data[:-(holdout)]
    X = _data_model[_col].fillna(0)
    if int(option[3]) !=1:
        X = sm.add_constant(X)#定数を入れるか
    Y = _data_model[predict].fillna(0)
    X_all = _data[_col].fillna(0)
    Y_all = _data[predict].fillna(0)

    if estimate_method =='ols':
        model = smf.OLS(Y,X)
    if estimate_method =='wls':
        model = smf.WLS(Y,X)
    if estimate_method =='glm_po':
        model = smf.GLM(Y,X,family=sm.families.Poisson())
    # 予測モデルを作成
    result = model.fit()
    result.summary()
    ##サマリをテキスト保存
    f = open( UPLOADE_DIR +'/temp/mlr/text/summary.txt', 'w' ) 
    f.write( str(result.summary()) ) 
    f.close()

    # 予測値計算
    Y_pre = pd.DataFrame(columns = [])
    for i in _col:
        Y_pre[i] = result.params[i]*X_all[i]
    pred_df = Y_pre.sum(axis = 1) + result.params[0]

    #insert_data = SummaryModel(model = option[2],method = estimate_method,aic=round(result.aic,3),bic=round(result.bic,3),rsq=round(result.rsquared,3),rsq_adj=round(result.rsquared_adj,3),holdout = holdout,session_id=session_id)
    insert_data = SummaryModel(model = option[2],method = estimate_method,aic=round(result.aic,3),bic=round(result.bic,3),rsq=round(result.rsquared,3),rsq_adj=round(result.rsquared_adj,3),holdout = holdout)
    insert_data.save()

    #preview用データ格納
    data_rsp = pd.DataFrame(columns = [])
    data_rsp['index'] = [int(i) for i in range(len(_data))]
    data_rsp['original'] = Y_all
    data_rsp['predict'] = pred_df
    data_stat_preview = data_stat_preview.append(data_rsp)
    #Result画面でのグラフ用にオリジナルデータ + 予測データ
    data_rsp = pd.DataFrame(columns = [])
    data_rsp['index'] = [int(i) for i in range(len(_data))]
    data_rsp['original'] = Y_all
    data_rsp['predict'] = pred_df
    data_ori = data_ori.append(data_rsp)
    return data_stat,data_stat_preview,data_ori

Example #2

0

Show file

 nextYM = list(mreturnP.YM)[pos + 61]
 currRF = list(mreturnP.RF)[pos + 60]
 
 for tkt in common_tickers:
     stockReturn = currReturn[currReturn.YM.isin(mRets.YM)][tkt]
     #delete NAN data
     combined = pd.concat([stockReturn,mRets], axis =1)
     cleaned = combined.dropna(axis=0)
     betaReturn = cleaned[tkt]-cleaned['RF']
     # Create linear regression object
     if len(cleaned) is not 0:
         aweights = [math.pow(0.5, math.pow((1/23), x) ) for x in range(0, len(cleaned) )]
         params = pd.DataFrame ({
         'Ticker':[tkt],
         'YM': [currYM],
         'Mkt_RF': [sm.WLS(betaReturn.values,cleaned['Mkt_RF'].values, weights = aweights).fit().params[0]],
         'SMB': [sm.WLS(betaReturn.values,cleaned['SMB'].values, weights = aweights).fit().params[0]],
         'HML': [sm.WLS(betaReturn.values,cleaned['HML'].values, weights = aweights).fit().params[0]],
         'CMA': [sm.WLS(betaReturn.values,cleaned['RMW'].values, weights = aweights).fit().params[0]],
         'RF': [sm.WLS(betaReturn.values,cleaned['CMA'].values, weights = aweights).fit().params[0]] })
     beta = pd.concat([beta, params])
 
 betas = pd.concat([betas, beta])
     
 currLiquidity =  liquidity[liquidity.Ticker.isin(common_tickers)][liquidity.YM ==currYM].groupby('Ticker').mean()
 currLiquidity = currLiquidity.reset_index()
 currLiquidity.index = currLiquidity.Ticker
 currPrice   = equityPriceC[equityPriceC.Ticker.isin(common_tickers)][equityPrice.YM ==currYM].groupby('Ticker').mean()
 currPrice = currPrice.reset_index()
 currPrice.index = currPrice.Ticker
 existingTickers = currPrice.Ticker.unique()

Example #3

0

Show file

File: irls.py Project: sauravn/MachineLearning-Python

def robust_irls_regression(x, y, c, penalty, max_iter=100, tol=1e-8):
    """Run a robust linear regression with iterated weighted least squares
        Parameters
        ----------
        x : float
            predictor vector of size n*1
        y : float
            target variable of size n*1
        
        penalty : string
            Type of penalty to be applied while doing the iterated weighted
            least squares (IWLS) regression. Defualts to OLS regression.
            Choices for penalty are 'Huber', 'Tukey'. 
        
        c: float
            Tuning hyperparameter depending on the chosen penalty.
            Defaults to none.
            
        max_iter : int
            Maximum number of iterations for IWLS. Default is 100
        
        tol: float
            tolerance level for norm of difference of successive estimates for
            coefficients of linear regression and robust estimates of spread
            of residuals. Defaults to 1e-8
    
        Returns
        -------
        coefs : float
            2*1 vector of coefficients of linear regression. 
        """
    x = np.c_[np.ones(len(x)), x]  # append column vector of 1's

    #------------------------------------------
    # Initial co-efficients, returned by OLS.
    #------------------------------------------
    results = smf.WLS(y, sm.add_constant(x)).fit()
    coefs = results.params

    #--------------------------------------------
    # Raise error if no penalty is specified
    #----------------------------------------
    if penalty is None:
        raise ValueError("Specify either 'Huber' or 'Tukey' penalty!")

    old_coefs = coefs
    residuals = results.resid

    #--------------------------------------------
    # Initial estimate for spread of residuals
    #--------------------------------------------
    robust_sd = sm_scale.mad(residuals)
    old_robust_sd = robust_sd

    #--------------------------------------------
    # Initialize weights
    #--------------------------------------------
    weights = np.diag(1.0 / (residuals**2))

    for iteration in range(max_iter):

        #-------------------------------
        # Update regression coefficients
        #-------------------------------
        coefs = LA.solve(np.dot(np.dot(x.T, weights), x),
                         np.dot(np.dot(x.T, weights), y))

        #--------------------------------------------
        # Update residuals
        #--------------------------------------------
        residuals = y - np.dot(coefs, x.T)

        #--------------------------------------------
        # Update robust measure of spread of residuals
        #--------------------------------------------
        robust_sd = sm_scale.mad(residuals)

        #--------------------------------------------
        # Standardize updated residuals
        #--------------------------------------------
        standardized_residuals = residuals / robust_sd

        #--------------------------------------------
        # Update weights
        #--------------------------------------------
        if penalty == 'Huber':
            weights = np.diag(huber_weight(standardized_residuals, c=c))
        elif penalty == "Tukey":
            weights = np.diag(tukey_weight(standardized_residuals, c=c))

        #--------------------------------------------
        # Stop if estimates for co-efficients and spread of residuals
        # are stable.
        #--------------------------------------------
        if LA.norm(robust_sd - old_robust_sd) < tol and \
           LA.norm(coefs - old_coefs) < tol:
            break

        old_coefs = coefs
        old_robust_sd = robust_sd

    return coefs

Example #4

0

Show file

def iterative_wls(x, y, tol=1e-6, max_iter=100):
    
    """Run a weighted least squares linear regression with
       iterative refinement of variance. (This is computationally intensive!)
        Parameters
        ----------
        x : float
            predictor vector of size n*1
        y : float
            target variable of size n*1
        max_iter : int
                   Maximum number of iterations for IWLS. Default is 100
        tol: float
             tolerance level for norm of difference of successive estimates for
             coefficients of linear regression and robust estimates of spread
             of residuals. Defaults to 1e-6
        Returns
        -------
        coefs : float
            2*1 vector of coefficients of linear regression.
        """
        
    x = np.c_[np.ones(len(x)), x] # append column vector of 1's
    iteration = 0
    old_coefs = None

    #----------------------------------------
    # Run an OLS to get initial estimates
    #----------------------------------------
    regression = smf.WLS(y, sm.add_constant(x)).fit()
    coefs = regression.params

    while old_coefs is None or (np.max(abs(coefs - old_coefs)) > tol and
                                iteration < max_iter): 
    
        #----------------------------------------------------------------------
        # Construct the log-squared residuals and use a non-parametric
        # method (kernel regression) to estimate the conditonal mean. 
        # Residual can be 0 in which case log-squared residual is not defined.
        # Ignore the warning and put a small value for log-squared residual and
        # proceed. 
        
        # Exponentiate to predict the variance and take inverse of the variance 
        # as weights.
        #----------------------------------------------------------------------
        with np.errstate(divide='ignore', invalid='ignore'): 

            old_coefs = coefs
            log_squared_residuals = np.where(regression.resid**2 > 0, 
                                         np.log(regression.resid**2), 
                                         1e-12)
            model = nparam_kreg.KernelReg(endog=y,
                                      exog=log_squared_residuals,
                                      var_type='c')
            weights = np.exp(model.fit()[0])**-1

            #-------------------------------
            # Update regression coefficients
            #-------------------------------
            regression = sm.WLS(y, sm.add_constant(x), weights=weights).fit()
            coefs = regression.params
            iteration += 1

    return coefs

Example #5

0

Show file

data = np.matrix(df)
x, y = data[:, 1], data[:, 2]
lm.fit(x, y)

lm = sm.ols(formula='y ~ x', data=df).fit()
print lm.summary()
exog = pd.DataFrame({'x': [10, 15]})
lm.predict(exog)
lm.resid
lm.params

# Weighted Least Squares
nsamp = df.shape[0]
Y = np.array(df['y'])
X = np.c_[np.ones(nsamp), np.array(df['x'])]
fm1 = sm.WLS(Y, X, weights=1 / w**2)
res_fm1 = fm1.fit()
res_fm1.summary()

# Plots
plt.figure()
plt.scatter(x, y)
x_range = arange(0, 20, .1)
exog = DataFrame({'x': x_range})
y_pred = lm.predict(exog)
plt.plot(x_range, y_pred)
plt.close()

res = lm.resid
fig = sma.qqplot(resid)
plt.show()