Ejemplo n.º 1
0
def is_autocorrelated(ts, verbose=False):
    # Assume no correlation for 1.8-2.2 (https://web.stanford.edu/~clint/bench/dw05d.htm)
    dw = abs(sms.durbin_watson(ts) - 2) > 0.2
    alb = sms.acorr_ljungbox(ts)[1][0] <= 0.05
    if verbose:
        print("\nTest for Autocorrelation:")
        # 0 = pos auto correlated, 1.5-2.5 = no correlation (rule of thumb), 4 = neg auto correlated
        # https://web.stanford.edu/~clint/bench/dwcrit.htm For 700 samples: dL and dU around 1.8
        print(f">Durbin-Watson (null(2) = no autocorr., lag 1): statistic"
              f" = {sms.durbin_watson(ts):.4f}")
        print(f">Ljung-Box-Q (null = no autocorr., lag 1): p value"
              f" = {sms.acorr_ljungbox(ts)[1][0]:.2f}")
        # Requires statsmodel Object e.g. from OLS(): sms.acorr_breusch_godfrey(ts, nlags = 2)
    return np.sum([dw, alb]) / 2.0
Ejemplo n.º 2
0
def show_regression(n_clicks, value):
    if value is None:
        raise PreventUpdate
    if len(value) < 2:
        return html.P('You need to select more than one variable.'), None, None
    else:
        reg_df = pd.DataFrame(database[value[0]])
        for i in range(1, len(value)):
            reg_df = pd.concat(
                [reg_df, pd.DataFrame(database[value[i]])], axis=1)
        reg_df.columns = value  # add column names
        reg_df.dropna(inplace=True)  # drop NAs
        y = reg_df[value[0]]  # extract the dependent variable
        x = reg_df[value[1:]]  # extract the independent variables
        X = sm.add_constant(x)  # add a constant

        # Create OLS regression
        model = sm.OLS(y, X).fit(cov_type='HC3')

        # Extract various OLS regression summary statistics
        model_df1 = pd.DataFrame(
            {
                'nobs': model.nobs,
                'Adj_R-square': round(model.rsquared_adj, 3),
                'DW_test': round(sms.durbin_watson(model.resid), 3)
            },
            index=[0])

        # Give name to row in table
        names = ['constant'] + value[1:]
        # Fill out the rest of the table to be displayed
        model_df2 = pd.DataFrame.from_dict({
            'regressors':
            names,
            'coefficients': [round(x, 3) for x in model.params],
            'T-stats': [round(x, 3) for x in model.tvalues]
        })
        explanation = '''
           Below are two tables showing the regression output. 
           The first table shows the number of observations, the adjusted \
           r-square, and the Durbin Watson statistic for serial correlation.

           The second table shows the independent variables entered in the \
           order selected with the constant displayed first. \
           The second column displays the beta coefficient, and \
           the third column shows the T-statistic for a two-tailed test.
           '''
        return generate_table(model_df1), generate_table(
            model_df2), explanation
Ejemplo n.º 3
0
    def diagnostic_plots(self, linear_model):
        """
        :param linear_model: Linear Model Fit on the Data
        :return: None
        This method validates the assumptions of Linear Model
        """
        diagnostic_result = {}

        summary = linear_model.summary()
        #diagnostic_result['summary'] = str(summary)

        # fitted values
        fitted_y = linear_model.fittedvalues
        # model residuals
        residuals = linear_model.resid

        # normalized residuals
        residuals_normalized = linear_model.get_influence().resid_studentized_internal

        # absolute squared normalized residuals
        model_norm_residuals_abs_sqrt = np.sqrt(np.abs(residuals_normalized))

        # leverage, from statsmodels internals
        leverage = linear_model.get_influence().hat_matrix_diag

        # cook's distance, from statsmodels internals
        cooks = linear_model.get_influence().cooks_distance[0]

        self.check_linearity_assumption(fitted_y, residuals)

        self.check_residual_normality(residuals_normalized)

        self.check_homoscedacticity(fitted_y, model_norm_residuals_abs_sqrt)

        self.check_influcence(leverage, cooks, residuals_normalized)

        # 1. Non-Linearity Test
        try:
            name = ['F value', 'p value']
            test = sms.linear_harvey_collier(linear_model)
            linear_test_result = lzip(name, test)
        except Exception as e:
            linear_test_result = str(e)
        diagnostic_result['Non_Linearity_Test'] = linear_test_result

        # 2. Hetroskedasticity Test
        name = ['Lagrange multiplier statistic', 'p-value',
                'f-value', 'f p-value']
        test = sms.het_breuschpagan(linear_model.resid, linear_model.model.exog)
        test_val = lzip(name, test)
        diagnostic_result['Hetroskedasticity_Test'] = test_val

        # 3. Normality of Residuals
        name = ['Jarque-Bera', 'Chi^2 two-tail prob.', 'Skew', 'Kurtosis']
        test = sms.jarque_bera(linear_model.resid)
        test_val = lzip(name, test)
        diagnostic_result['Residual_Normality_Test'] = test_val

        # 4. MultiCollnearity Test
        test = np.linalg.cond(linear_model.model.exog)
        test_val = [('condition no',test)]
        diagnostic_result['MultiCollnearity_Test'] = test_val

        # 5. Residuals Auto-Correlation Tests
        test = sms.durbin_watson(linear_model.resid)
        test_val = [('p value', test)]
        diagnostic_result['Residual_AutoCorrelation_Test'] = test_val

        json_result = json.dumps(diagnostic_result)
        return summary, json_result