def is_autocorrelated(ts, verbose=False): # Assume no correlation for 1.8-2.2 (https://web.stanford.edu/~clint/bench/dw05d.htm) dw = abs(sms.durbin_watson(ts) - 2) > 0.2 alb = sms.acorr_ljungbox(ts)[1][0] <= 0.05 if verbose: print("\nTest for Autocorrelation:") # 0 = pos auto correlated, 1.5-2.5 = no correlation (rule of thumb), 4 = neg auto correlated # https://web.stanford.edu/~clint/bench/dwcrit.htm For 700 samples: dL and dU around 1.8 print(f">Durbin-Watson (null(2) = no autocorr., lag 1): statistic" f" = {sms.durbin_watson(ts):.4f}") print(f">Ljung-Box-Q (null = no autocorr., lag 1): p value" f" = {sms.acorr_ljungbox(ts)[1][0]:.2f}") # Requires statsmodel Object e.g. from OLS(): sms.acorr_breusch_godfrey(ts, nlags = 2) return np.sum([dw, alb]) / 2.0
def show_regression(n_clicks, value): if value is None: raise PreventUpdate if len(value) < 2: return html.P('You need to select more than one variable.'), None, None else: reg_df = pd.DataFrame(database[value[0]]) for i in range(1, len(value)): reg_df = pd.concat( [reg_df, pd.DataFrame(database[value[i]])], axis=1) reg_df.columns = value # add column names reg_df.dropna(inplace=True) # drop NAs y = reg_df[value[0]] # extract the dependent variable x = reg_df[value[1:]] # extract the independent variables X = sm.add_constant(x) # add a constant # Create OLS regression model = sm.OLS(y, X).fit(cov_type='HC3') # Extract various OLS regression summary statistics model_df1 = pd.DataFrame( { 'nobs': model.nobs, 'Adj_R-square': round(model.rsquared_adj, 3), 'DW_test': round(sms.durbin_watson(model.resid), 3) }, index=[0]) # Give name to row in table names = ['constant'] + value[1:] # Fill out the rest of the table to be displayed model_df2 = pd.DataFrame.from_dict({ 'regressors': names, 'coefficients': [round(x, 3) for x in model.params], 'T-stats': [round(x, 3) for x in model.tvalues] }) explanation = ''' Below are two tables showing the regression output. The first table shows the number of observations, the adjusted \ r-square, and the Durbin Watson statistic for serial correlation. The second table shows the independent variables entered in the \ order selected with the constant displayed first. \ The second column displays the beta coefficient, and \ the third column shows the T-statistic for a two-tailed test. ''' return generate_table(model_df1), generate_table( model_df2), explanation
def diagnostic_plots(self, linear_model): """ :param linear_model: Linear Model Fit on the Data :return: None This method validates the assumptions of Linear Model """ diagnostic_result = {} summary = linear_model.summary() #diagnostic_result['summary'] = str(summary) # fitted values fitted_y = linear_model.fittedvalues # model residuals residuals = linear_model.resid # normalized residuals residuals_normalized = linear_model.get_influence().resid_studentized_internal # absolute squared normalized residuals model_norm_residuals_abs_sqrt = np.sqrt(np.abs(residuals_normalized)) # leverage, from statsmodels internals leverage = linear_model.get_influence().hat_matrix_diag # cook's distance, from statsmodels internals cooks = linear_model.get_influence().cooks_distance[0] self.check_linearity_assumption(fitted_y, residuals) self.check_residual_normality(residuals_normalized) self.check_homoscedacticity(fitted_y, model_norm_residuals_abs_sqrt) self.check_influcence(leverage, cooks, residuals_normalized) # 1. Non-Linearity Test try: name = ['F value', 'p value'] test = sms.linear_harvey_collier(linear_model) linear_test_result = lzip(name, test) except Exception as e: linear_test_result = str(e) diagnostic_result['Non_Linearity_Test'] = linear_test_result # 2. Hetroskedasticity Test name = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value'] test = sms.het_breuschpagan(linear_model.resid, linear_model.model.exog) test_val = lzip(name, test) diagnostic_result['Hetroskedasticity_Test'] = test_val # 3. Normality of Residuals name = ['Jarque-Bera', 'Chi^2 two-tail prob.', 'Skew', 'Kurtosis'] test = sms.jarque_bera(linear_model.resid) test_val = lzip(name, test) diagnostic_result['Residual_Normality_Test'] = test_val # 4. MultiCollnearity Test test = np.linalg.cond(linear_model.model.exog) test_val = [('condition no',test)] diagnostic_result['MultiCollnearity_Test'] = test_val # 5. Residuals Auto-Correlation Tests test = sms.durbin_watson(linear_model.resid) test_val = [('p value', test)] diagnostic_result['Residual_AutoCorrelation_Test'] = test_val json_result = json.dumps(diagnostic_result) return summary, json_result