Esempio n. 1
0
def test_durbin_watson():
    #benchmark values from R car::durbinWatsonTest(x)
    #library("car")
    #> durbinWatsonTest(x)
    #[1] 1.95298958377419
    #> durbinWatsonTest(x**2)
    #[1] 1.848802400319998
    #> durbinWatsonTest(x[2:20]+0.5*x[1:19])
    #[1] 1.09897993228779
    #> durbinWatsonTest(x[2:20]+0.8*x[1:19])
    #[1] 0.937241876707273
    #> durbinWatsonTest(x[2:20]+0.9*x[1:19])
    #[1] 0.921488912587806
    st_R = 1.95298958377419
    assert_almost_equal(durbin_watson(x), st_R, 14)

    st_R = 1.848802400319998
    assert_almost_equal(durbin_watson(x**2), st_R, 14)

    st_R = 1.09897993228779
    assert_almost_equal(durbin_watson(x[1:] + 0.5 * x[:-1]), st_R, 14)

    st_R = 0.937241876707273
    assert_almost_equal(durbin_watson(x[1:] + 0.8 * x[:-1]), st_R, 14)

    st_R = 0.921488912587806
    assert_almost_equal(durbin_watson(x[1:] + 0.9 * x[:-1]), st_R, 14)
Esempio n. 2
0
def test_durbin_watson():
    #benchmark values from R car::durbinWatsonTest(x)
    #library("car")
    #> durbinWatsonTest(x)
    #[1] 1.95298958377419
    #> durbinWatsonTest(x**2)
    #[1] 1.848802400319998
    #> durbinWatsonTest(x[2:20]+0.5*x[1:19])
    #[1] 1.09897993228779
    #> durbinWatsonTest(x[2:20]+0.8*x[1:19])
    #[1] 0.937241876707273
    #> durbinWatsonTest(x[2:20]+0.9*x[1:19])
    #[1] 0.921488912587806
    st_R = 1.95298958377419
    assert_almost_equal(durbin_watson(x), st_R, 14)

    st_R = 1.848802400319998
    assert_almost_equal(durbin_watson(x**2), st_R, 14)

    st_R = 1.09897993228779
    assert_almost_equal(durbin_watson(x[1:]+0.5*x[:-1]), st_R, 14)

    st_R = 0.937241876707273
    assert_almost_equal(durbin_watson(x[1:]+0.8*x[:-1]), st_R, 14)

    st_R = 0.921488912587806
    assert_almost_equal(durbin_watson(x[1:]+0.9*x[:-1]), st_R, 14)
def fit(y, X, reg_names):
    nr = len(reg_names)

    try:
        mod = sm.GLSAR(y.values, X, 2,
                       missing='drop')  # MLR analysis with AR2 modeling
        res = mod.iterative_fit()
        output = xr.Dataset({'coef': (['reg_name'], res.params[1:]), \
                'conf_int': (['reg_name', 'limit'], res.conf_int()[1:,:]), \
                'p_value': (['reg_name'],  res.pvalues[1:]), \
                'DWT': (sms.durbin_watson(res.wresid)), \
                'CoD': (res.rsquared)}, \
                coords = {'reg_name': (['reg_name'], reg_names),\
                          'limit': (['limit'], ['lower', 'upper'])})
    except:
        nans = np.full([nr], np.nan)
        output = xr.Dataset({'coef': (['reg_name'], nans), \
                'conf_int': (['reg_name', 'limit'], np.array([nans, nans]).T), \
                'p_value': (['reg_name'],  nans), \
                'DWT': (np.nan), \
                'CoD': (np.nan)}, \
                coords = {'reg_name': (['reg_name'], reg_names),\
                          'limit': (['limit'], ['lower', 'upper'])})

    return output
Esempio n. 4
0
    def regression_analysis(self, y_column: str, *x_column: str) -> dict:
        """回归分析(OLS)

        :param y_column: y值所在的列名
        :param x_column: x值所在的列名
        :return: 字典,包括参数、检验结果
        """
        X_turple = (np.array(self.data[x_column[0]]), )
        for i in range(1, len(x_column)):
            for column_info in self.meta:
                if "{}. {}".format(column_info['index'], column_info['title']) == x_column[i]:
                    if column_info['type'] in ['rate', 'scale', 'numInput']:  # 连续性变量直接插入矩阵
                        X_turple += (np.array(self.data[x_column[i]]), )
                    elif column_info['type'] in ["radio", "checkbox", "sort"]:  # 分类变量转化为虚拟变量后删去参照组插入矩阵
                        dummy = sm.categorical(np.array(self.data[x_column[i]]))
                        X_turple += (dummy[:, 1:], )
                    break
        X = np.column_stack(X_turple)
        X = sm.add_constant(X)
        y = np.array(self.data[y_column])
        model = sm.OLS(y, X)
        result = model.fit()
        result_dict = dict()
        result_dict['params'] = [round(i, 3) for i in result.params]  # [常数, x1, x2, ...]
        result_dict['tvalues'] = [round(i, 3) for i in result.tvalues]
        result_dict['pvalues'] = [round(i, 3) for i in result.pvalues]
        result_dict['rsquared'] = round(result.rsquared, 3)
        result_dict['rsquared_adj'] = round(result.rsquared_adj, 3)
        result_dict['fvalue'] = round(result.fvalue, 3)
        result_dict['f_pvalue'] = round(result.f_pvalue, 3)
        result_dict['DW'] = round(durbin_watson(result.wresid), 3)
        result_dict['condition_number'] = round(result.condition_number)
        if np.isnan(result_dict['f_pvalue']):
            return None
        return result_dict
Esempio n. 5
0
    def setup(self):
        self._calc_coefficients()
        self._setup_return_analysis_of_fund_and_fit(self.fitted_tms)
        self._setup_out_of_sample_fit()

        residuals = self.fit_model.resid
        self.durbin_watson_test = durbin_watson(residuals)

        regressors_df = self.input_data.regressors_df
        analysed_tms = self.input_data.analysed_tms
        self.risk_contribution = RiskContributionAnalysis.get_risk_contribution(
            regressors_df, self.coefficients, analysed_tms)

        factors_perf_attrib, unexplained_perf_attrib = ReturnAttributionAnalysis.get_factor_return_attribution(
            analysed_tms, self.fitted_tms, regressors_df, self.coefficients,
            self.intercept)
        self.factors_performance_attribution_ret = factors_perf_attrib
        self.unexplained_performance_attribution_ret = unexplained_perf_attrib

        self._setup_correlations(self.fitted_tms)
        self.condition_number = cond(regressors_df.values)
        self._setup_r_square_of_each_predictor()
        self._setup_autocorrelation(residuals)
        _, _, _, self.heteroskedasticity = het_breuschpagan(
            residuals, self.fit_model.model.exog)
        self._setup_cooks_distance(self.fit_model)
Esempio n. 6
0
def fit_sm(result, coordinates, to_fit, data, design, r, s, sortvar):
    for i in range(coordinates.shape[0]):
        if to_fit[i]:
            squared_distances = ((data[...,:3] - coordinates[i])**2).sum(axis=1)
            valid = np.where(squared_distances < r)
            if valid[0].size > 120:
                weights = np.exp(squared_distances[valid] / s)

                fit = sm.WLS(
                    endog    = data[valid][...,3],
                    exog     = design[valid],
                    weights  = weights,
                    hasconst = True).fit()

                result[i,0]   = fit.params
                result[i,1]   = fit.bse
                result[i,2]   = fit.tvalues
                result[i,3,0] = fit.mse_resid
                result[i,3,1] = fit.df_resid

                df = DataFrame({
                    'x'      : data[valid][...,0],
                    'y'      : data[valid][...,1],
                    'z'      : data[valid][...,2],
                    'time'   : data[valid][...,4]})
                df['weighted_residual'] = weights * fit.resid
                df.sort_values(by=sortvar, inplace=True)
                result[i,3,2] = durbin_watson(df.weighted_residual)
def metrics(obs, pred, f, q, m):
    # obs - log(observed), pred - prediction, f - FIB, q - subset, m - model
    rsq = round(r2_score(obs, pred), 3)
    dw = round(durbin_watson(obs - pred), 3)  # Durbin-Watson
    rmse = round(np.sqrt(((pred - obs)**2).mean()),
                 3)  # Root Mean Square Error
    mape = 100 * round(abs(
        (pred - obs) / obs).mean(), 3)  # Mean Absolute Percentage Error
    sens_spec = wqm.pred_eval(obs, pred, thresh=np.log10(
        wqm.fib_thresh(f)))  # Sensitivity/Specificity
    auroc = round(HF_models.compute_AUROC(obs, pred, f),
                  3)  # Area Under the Receiver Operating Curve

    # Add to q performance for model m to perf dataframe
    mets = [[
        rsq, dw, rmse, mape, auroc, sens_spec['Sensitivity'],
        sens_spec['Specificity'], sens_spec['Samples'],
        sens_spec['Exceedances']
    ]]
    temp_perf = pd.DataFrame(data=mets,
                             columns=[
                                 'Rsq', 'D-W', 'RMSE', 'MAPE', 'AUROC', 'sens',
                                 'spec', 'N', 'exc'
                             ],
                             index=[[q], [m]])
    return temp_perf
Esempio n. 8
0
 def autocorrelation_assumption():
     '''
     Autocorrelation: Assumes that there is no autocorrelation in the residuals. If there is
                      autocorrelation, then there is a patern that is not explained due to
                      the current value being dependent on the previous value.
                      This may be resolved by adding a lag variable of either the dependent
                      variable or some of the predictors.
     '''
     from statsmodels.stats.stattools import durbin_watson
     print('\n=======================================================================================')
     print('Assumption 4: No Autocorrelation')
     print('\nPerforming Durbin-Watson Test')
     print('Values of 1.5 < d < 2.5 generally show that there is no autocorrelation in the data')
     print('0 to 2< is positive autocorrelation')
     print('>2 to 4 is negative autocorrelation')
     print('-------------------------------------')
     durbinWatson = durbin_watson(df_results['Residuals'])
     print('Durbin-Watson:', durbinWatson)
     if durbinWatson < 1.5:
         print('Signs of positive autocorrelation')
         print('\nAssumption not satisfied')
     elif durbinWatson > 2.5:
         print('Signs of negative autocorrelation')
         print('\nAssumption not satisfied')
     else:
         print('Little to no autocorrelation')
         print('\nAssumption satisfied')
Esempio n. 9
0
 def test_durbin_watson_3d(self):
     shape = (10, 1, 10)
     x = np.random.standard_normal(100)
     dw = sum(np.diff(x)**2.0) / np.dot(x, x)
     x = np.tile(x[None, :, None], shape)
     assert_almost_equal(np.squeeze(dw * np.ones(shape)),
                         durbin_watson(x, axis=1))
Esempio n. 10
0
 def best_lag_dw(self, df, threshold=0.2):
     model = VAR(df, freq="MS")
     # Assumes stationary data.
     best_aic = 99999
     best_lag = None
     best_dw = None
     # Searching for best lag order.
     for i in range(1, 16):
         result = model.fit(i)
         #print("Lag order: ", i, " AIC: ", result.aic)
         # Checking with Durbin-Watson test for autocorrelation as well.
         dw_out = durbin_watson(result.resid)
         #print("DW test: ", dw_out)
         #print(abs(2.0-dw_out[0]))
         if ((result.aic < best_aic)
                 and (abs(2.0 - round(dw_out[0], 2)) <= threshold)
                 and (abs(2.0 - round(dw_out[1], 2)) <= threshold)):
             #print("ENTRA")
             best_aic = result.aic
             best_lag = i
             best_dw = dw_out
     print("Best lag order: ", best_lag, " with an AIC score of: ",
           best_aic)
     print("Durbin-Watson results:")
     for col, val in zip(df.columns, best_dw):
         print(col, ':', round(val, 2))
     print("-------------------------------------------------")
     return best_aic, best_lag, best_dw
Esempio n. 11
0
def volatile_models_metrics(input_model):
    return {
        'AIC': input_model.aic,
        'BIC': input_model.bic,
        'R-squared': input_model.rsquared,
        'DW': durbin_watson(input_model.resid.dropna())
    }
def autocorrelation_assumption(model, features, label):
    """
    Autocorrelation: Assumes that there is no autocorrelation in the residuals. If there is
                     autocorrelation, then there is a pattern that is not explained due to
                     the current value being dependent on the previous value.
                     This may be resolved by adding a lag variable of either the dependent
                     variable or some of the predictors.
    """
    print("Assumption 4: No Autocorrelation", "\n")

    # Calculating residuals for the Durbin Watson-tests
    df_results = calculate_residuals(model, features, label)

    print("\nPerforming Durbin-Watson Test")
    print(
        "Values of 1.5 < d < 2.5 generally show that there is no autocorrelation in the data"
    )
    print("0 to 2< is positive autocorrelation")
    print(">2 to 4 is negative autocorrelation")
    print("-------------------------------------")
    durbinWatson = durbin_watson(df_results["Residuals"])
    print("Durbin-Watson:", durbinWatson)
    if durbinWatson < 1.5:
        print("Signs of positive autocorrelation", "\n")
        print("Assumption not satisfied")
    elif durbinWatson > 2.5:
        print("Signs of negative autocorrelation", "\n")
        print("Assumption not satisfied")
    else:
        print("Little to no autocorrelation", "\n")
        print("Assumption satisfied")
Esempio n. 13
0
def get_durbin_watson(cols, model):
    """
    Check for Serial Correlation of Residuals (Errors)
    """
    out = durbin_watson(model.resid)
    for col, val in zip(cols, out):
        print(col, ':', round(val, 2))
Esempio n. 14
0
def get_model_info(model):
    bg_lm, bg_lm_pval, bg_fval, bg_fpval = acorr_breusch_godfrey(model)
    jb, jb_pval, jb_skew, jb_kurtosis = jarque_bera(model.resid)
    het_bp_lm, het_bp_lmpval, het_bp_fval, het_bp_fpval = het_breuschpagan(
        model.resid, model.model.exog)

    return {
        'r_squared': model.rsquared,
        'adj_r_squared': model.rsquared_adj,
        'p_values': model.pvalues,
        'params': model.params,
        'std': model.bse,
        'size': model.nobs,
        't_values': model.tvalues,
        'durbin_watson': durbin_watson(model.resid),
        'breusch_godfrey': {
            'lm': bg_lm,
            'lm_pval': bg_lm_pval,
            'fval': bg_fval,
            'f_pval': bg_fpval
        },
        'jarque_bera': {
            'jb': jb,
            'jb_pval': jb_pval,
            'skew': jb_skew,
            'kurtosis': jb_kurtosis
        },
        'het_breuschpagan': {
            'lm': het_bp_lm,
            'lm_pval': het_bp_lmpval,
            'fval': het_bp_fval,
            'f_pval': het_bp_fpval
        },
        'residuals': model.resid
    }
def test_autocorrelation(model_path, data_path):
    from statsmodels.stats.stattools import durbin_watson

    df_results = calculate_residuals(model_path, data_path)
    durbinWatson = durbin_watson(df_results['Residuals'])

    assert durbinWatson > 1.5
    assert durbinWatson < 2.5
def dw_test(error):
    """
    The test statistic is approximately equal to 2*(1-r) where r is the sample autocorrelation of the residuals.
    Thus, for r == 0, indicating no serial correlation, the test statistic equals 2. This statistic will always be
    between 0 and 4. The closer to 0 the statistic, the more evidence for positive serial correlation. The closer to 4,
    the more evidence for negative serial correlation.
    """
    print('dw test', durbin_watson(error, axis=0))
Esempio n. 17
0
def checkdb(df,col):
    "It also tells whether the Data is serially correlated or not "
    r = k.durbin_watson(df[col], axis=0)
    if(r==0):
        print "Not Serially correlated "
        return False,r
    else:
        print "Serially correlated "
        return True,r
Esempio n. 18
0
    def durbin_watson(self):
        """Performs Durbin-Watson test for checking autocorrelation of residuals"""
        if not self.is_fitted:
            print("Model not fitted yet!")
            return None
        from statsmodels.stats.stattools import durbin_watson
        test_score = float(durbin_watson(self.resid_))

        return round(test_score, 3)
Esempio n. 19
0
    def check(self, residuals, n):
        tStats = durbin_watson(residuals)
        pValue = stats.t.sf(np.abs(tStats), n - 1) * 2

        HypothesisTestObj = HypothesisTest(
            H0="""the residuals are not correlated""", pValue=pValue)
        #HypothesisTestObj.log()

        self.violation = 1 - HypothesisTestObj.result
Esempio n. 20
0
def dw(data_frame):
    """
    Take in a data frame use OLS to build the residuals

    Returns the Durbin-Watson Statistic, best value = 2.00
    """

    ols_res = OLS(data_frame, np.ones(len(data_frame))).fit()
    return durbin_watson(ols_res.resid)
Esempio n. 21
0
def fit_at(formula, **kwargs):
    model, data = model_at(formula=formula, **kwargs)
    fit = model.fit()

    data['expected_signal'] = fit.predict()
    data['residual'] = fit.resid
    data['weighted_residual'] = data.weight * fit.resid
    fit.durbin_watson = durbin_watson(data.weighted_residual)

    return fit, model, data
Esempio n. 22
0
 def autocorrelation(self):
     """
     Assumes no autocorrelation of the error terms. The value should be between 1.5 and 2.5.
     < 1.5 = positive autocorr. > 2.5 = negative autocorr
     """
     dw = durbin_watson(self.results['Residuals'])
     if dw < 1.5:
         return "Assumption not met - Positive Autocorrelation", dw
     elif dw > 2.5:
         return "Assumption not met - Negative Autocorrelation", dw
     return "Assumption met", dw
def generate_regression_dataframe(reg_dict):
    """
    From the dictionary which contains the regressions, it will extract the regressions:
    - parameters coefficients, standard deviation and p values,
    - r squared and adjusted r squared
    - f statistic and its p value
    - durbin watson test

    It will store in a pandas Dataframe, where each one of the information above will be assign to one column and each
    row will be a ticker.

    :param reg_dict: dictionary that contains the regressions
    :return: pandas DataFrame
    """

    lst_df = []
    lst_index = []

    for key in reg_dict.keys():

        lst_ = []

        lst_index.append(key)

        lst_.append(reg_dict[key].params['Constant'])  # constant parameter
        lst_.append(reg_dict[key].bse['Constant'])  # constant standard error
        lst_.append(reg_dict[key].pvalues['Constant'])  # constant p value

        lst_.append(
            reg_dict[key].params['Consumption'])  # consumption parameter
        lst_.append(
            reg_dict[key].bse['Consumption'])  # consumption standard error
        lst_.append(
            reg_dict[key].pvalues['Consumption'])  # consumption p value

        lst_.append(reg_dict[key].rsquared)  # r squared model
        lst_.append(reg_dict[key].rsquared_adj)  # adjusted r squared model
        lst_.append(reg_dict[key].fvalue)  # f statistic model
        lst_.append(reg_dict[key].f_pvalue)  # p value f statistic
        lst_.append(smt.durbin_watson(reg_dict[key].resid))  # durbin watson

        lst_df.append(lst_)

    lst_columns = [
        'coef_constante', 'std_err_constante', 'p_value_constante',
        'coef_consumption', 'std_err_consumption', 'p_value_consumption',
        'r_squared', 'r_squared_adj', 'f_stats', 'p_value_f_stats',
        'durb_watson'
    ]

    df_reg = pd.DataFrame(lst_df, columns=lst_columns, index=lst_index)

    return df_reg
Esempio n. 24
0
def check_autocorrelation(residual):
    s = durbin_watson(residual)
    if s <= 1.5:
        return s, 'there is positive correlation'
    if s >= 2.5:
        return s, 'there is negative correlation'
    if s < 2.5 and s >= 2.1:
        return s, 'there is a slight negative correlation'
    if s > 1.5 and s <= 1.9:
        return s, 'there is a slight positive correlation'
    else:
        return s, 'there is no correlation'
Esempio n. 25
0
 def dw_test(self, x, y):
     '''
     计算dw统计量, 并存入参数字典.
     :param X: array-like, GDP
     :param y: array-like, 居民人均收入
     :return: float, dw_value
     '''
     x1, const = self.para_dict["x1_coef"], self.para_dict["const_coef"]
     error = y - (x * x1 + const)
     dw = durbin_watson(error)
     self.para_dict["dw_value"] = dw
     return dw
Esempio n. 26
0
def residuals_properties(residuals, model='Model'):
    """
    Computes statistical values and displays plots to evaluate how the models fitted the training dataset. The residuals
    in a time series model are what is left over after fitting a model.

    :param model: string to identify the model. default_value='Model'
    :param residuals: residuals of the model.
    :return:
    """
    # Compute mean, median, skewness, kurtosis and durbin statistic
    mean_value = residuals.mean()
    median = np.median(residuals)
    # skewness = 0 : same weight in both the tails such as a normal distribution.
    skew = stats.skew(residuals)
    # Kurtosis is the degree of the peak of a distribution.
    # 3 it is normal, >3 higher peak, <3 lower peak
    kurtosis = stats.kurtosis(residuals)
    # Values between 0 and 2 indicate positive and values between 2 and 4 indicate negative auto-correlation.
    durbin = durbin_watson(residuals)
    # Anderson-Darling test null hypothesis: the sample follows the normal distribution
    anderson = stats.normaltest(residuals)[1]
    print(
        f'{model} residuals information:\n - Mean: {mean_value:.4f} \n - Median: {median:.4f} \n - Skewness: '
        f'{skew:.4f} \n - Kurtosis: {kurtosis:.4f}\n - Durbin: {durbin:.4f}\n - Anderson p-value: {anderson:.4f}'
    )
    # Create plots
    sn.set()
    fig, axes = plt.subplots(1, 5, figsize=(25, 5.3))
    # Compute standardized residuals
    residuals = (residuals - np.nanmean(residuals)) / np.nanstd(residuals)
    # First picture: q-q plot
    # Keep only not NaN residuals.
    residuals_non_missing = residuals[~(np.isnan(residuals))]
    qqplot(residuals_non_missing, line='s', ax=axes[0])
    axes[0].set_title('Normal Q-Q')
    # Second picture: simple plot of standardized residuals
    x = np.arange(0, len(residuals), 1)
    sn.lineplot(x=x, y=residuals, ax=axes[1])
    axes[1].set_title('Standardized residual')
    # Third picture: comparison between residual and gaussian distribution
    kde = stats.gaussian_kde(residuals_non_missing)
    x_lim = (-1.96 * 2, 1.96 * 2)
    x = np.linspace(x_lim[0], x_lim[1])
    axes[2].plot(x, stats.norm.pdf(x), label='Normal (0,1)', lw=2)
    axes[2].plot(x, kde(x), label='Residuals', lw=2)
    axes[2].set_xlim(x_lim)
    axes[2].legend()
    axes[2].set_title('Estimated density')
    # Last pictures: residuals auto-correlation plots
    plot_acf(residuals, ax=axes[3], lags=30)
    plot_pacf(residuals, ax=axes[4], lags=30)
    fig.tight_layout()
    plt.show()
Esempio n. 27
0
def par_boot(func_solve, func_fit, m, p, p_error, t, mRNA, res_old):
    n = 1000
    chi2_vec = np.zeros(n)
    dw_vec = np.zeros(n)
    if func_solve == 'stationary':
        # get solution to fitted model
        y_model = [p[0] for _ in res_old]
        # carry out n bootstraps
        for idx in range(n):
            # resample
            y_boot = y_model + np.array([np.random.normal(0, ps) for ps in p_error])
            # fit
            r, p_out = fit_stationary(t, y_boot, p_error)
            chi2_vec[idx] = np.sum(r**2)
            dw_vec[idx] = durbin_watson(r)
    else:
        # get solution to fitted model
        y_model = func_solve(np.log10(p), t, mRNA)
        # carry out n bootstraps
        for idx in range(n):
            # resample
            y_boot = y_model + np.array([np.random.normal(0, ps) for ps in p_error])
            # fit
            r, p_out = fit(func_fit, m, t, y_boot, p_error, mRNA, samples=1, plot=False, p_old=p)
            chi2_vec[idx] = np.sum(r**2)
            dw_vec[idx] = durbin_watson(r)
    # plotting to check distribution
    plot = False
    if plot:
        plt.hist(chi2_vec, bins=int(np.sqrt(n)))
        plt.show()
    # get p-value for chi2 test
    chi2_old = np.sum(res_old**2)
    chi2_ecdf = ECDF(chi2_vec)
    chi2_p = 1 - chi2_ecdf(chi2_old)                    # right sided test for chi2
    # get p-value for dw-test
    dw_old = durbin_watson(res_old)
    dw_ecdf = ECDF(dw_vec)
    dw_p = dw_ecdf(dw_old)                              # left sided test durbin-watson
    return chi2_p, dw_p
Esempio n. 28
0
def generate_regression_dataframe(reg_dict):

    """

    Extrai do dicionario contendo as regressoes os parametros, desvios padroes, p valores, r quadrado,
    r quadrado ajustado, estatistica f, p valor da estatistica f e durbin watson.

    Armazena todas essas informacoes em um DataFrame, sendo cada linha respectiva a um ticker.

    :param reg_dict: Dicionario contendo as regressoes.
    :return: DataFrame contendo os parametros das regressoes nas colunas e os tickers nas linhas
    """

    # Retira as informacoes do dicionario com as regressoes
    # entrada e o dicionario contendo as regressoes
    # retorna um dataframe com as informacoes

    lst_df = []
    lst_index = []

    for key in reg_dict.keys():

        lst_ = []

        lst_index.append(key)


        lst_.append(reg_dict[key].params['constante']) # parametro constante
        lst_.append(reg_dict[key].bse['constante']) # standard error constante
        lst_.append(reg_dict[key].pvalues['constante']) # p_value constante

        lst_.append(reg_dict[key].params['Consumo']) # parametro consumo
        lst_.append(reg_dict[key].bse['Consumo']) # standard error consumo
        lst_.append(reg_dict[key].pvalues['Consumo']) # #p_value consumo

        lst_.append(reg_dict[key].rsquared) # r squared model
        lst_.append(reg_dict[key].rsquared_adj) # adjusted r squared model
        lst_.append(reg_dict[key].fvalue) # f statistic model
        lst_.append(reg_dict[key].f_pvalue) # p value f statistic
        lst_.append(smt.durbin_watson(reg_dict[key].resid)) # durbin watson

        lst_df.append(lst_)

        del lst_

    lst_columns = ['coef_constante', 'std_err_constante', 'p_value_constante', 'coef_consumo', 'std_err_consumo',
                  'p_value_consumo', 'r_squared', 'r_squared_adj', 'f_stats', 'p_value_f_stats', 'durb_watson']


    df_reg = pd.DataFrame(lst_df, columns = lst_columns, index=lst_index)

    return df_reg
Esempio n. 29
0
def get_durbin_watson(errors, axis):  #must feed 1-d array
    '''
    A number which determines whether there is autocorrelation in the residuals of a time series regression. 
    The statistic ranges from 0 to 4 with 0 indicating positive autocorrelation and 4 indicating negative correlation. 
    A value of 2 indicates no auto correlation in the sample. The formula is expressed as:
    d=(sum from t=2 to t=T of: (et-et-1)2/(sum from t=1 to t=T of: et2)

    where the series of et are the residuals from a regression.

    Read more: http://www.businessdictionary.com/definition/Durbin-Watson-Statistic.html
    '''
    db = durbin_watson(errors.dropna(), axis)
    print('Durbin-Watson test statistic:{}'.format(db))
Esempio n. 30
0
    def score_VAR_correlation(self, models, x_train, lag=0, maxlag=None):
        '''
        durbin_watson test
        the closer the result is to 2 then there is no correlation, the closer to 0 or 4 then correlation implies
        '''
        for i, (name_model, model) in enumerate(models.items()):
            if name_model == 'VAR':
                if maxlag != None:  #studio hypersapce sul parametro lag
                    vet_aic = []
                    vet_bic = []
                    vet_fpe = []
                    vet_hqic = []
                    for i in range(maxlag):
                        result = model.fit(i)
                        vet_aic.append(result.aic)
                        vet_bic.append(result.bic)
                        vet_fpe.append(result.fpe)
                        vet_hqic.append(result.hqic)
                    df_results = pd.DataFrame()
                    df_results['AIC'] = vet_aic
                    df_results['BIC'] = vet_bic
                    df_results['FPE'] = vet_fpe
                    df_results['HQIC'] = vet_hqic
                    return df_results
                else:  # fit diretto su un valore specifico di lag
                    result = model.fit(lag)
                    out = durbin_watson(result.resid)
                    df_results = pd.DataFrame()
                    for col, val in zip(x_train.columns, out):
                        df_results[col] = [round(val, 2)]
                    return df_results.T

            elif name_model == 'VARMAX':
                result = model.fit()
                out = durbin_watson(result.resid)
                df_results = pd.DataFrame()
                for col, val in zip(x_train.columns, out):
                    df_results[col] = [round(val, 2)]
                return df_results.T
    def check_residual_autocorrelation(self):
        """Check the residual autocorrelation in a
        regression analysis using the Durbin-Watson test.

        The closer the Durbin-Watson value is to 0, the
        more evidence for positive serial correlation.
        The closer to 4, the more evidence for negative
        serial correlation.
        """
        if self.fitted_result is None:
            raise DataWasNotFitted()
        self.durbin_watson_value = stattools.durbin_watson(
            self.fitted_result.resid)
def regression_scores(timeseries, time_window_size, time_lag, reg, cv, scoring, timeseriesZ=None):
    """Compute regression scores for a given set of 3 timeseries
    according to the variable causality structures.
    """
    global causality_structures
    if scoring == 'residual_tests':
        features_regression = np.zeros([len(causality_structures),7])
    else:
        features_regression = np.zeros([len(causality_structures),2]) #added 2 dimensions to compute r2 and mse
    for j, (cs_train, cs_test) in enumerate(causality_structures):
        ts_train = timeseries[:,cs_train]
        if not(timeseriesZ is None):
            ts_train = np.hstack([ts_train, timeseriesZ])
        
        if time_lag is None:
            time_lag=time_window_size
        
        ts_test = timeseries[:,cs_test]
        tmp_score = np.zeros([time_window_size,2]) #added 2 dimensions to compute r2 and mse
        residuals = np.zeros(timeseries.shape[0]-time_window_size)
        for i_reg in range(time_window_size):
            idx_example = np.arange(i_reg, timeseries.shape[0]-time_lag, time_window_size)
            X = np.zeros((idx_example.size, time_window_size, ts_train.shape[1]))
            for k in range(time_window_size):
                X[:,k] = ts_train[idx_example+k]
            
            X = X.reshape(X.shape[0], X.shape[1] * X.shape[2])
            y = ts_test[idx_example + time_lag]
            if scoring == 'residual_tests':
                y_pred_i_reg = np.zeros(y.size)
                kfold = KFold(n=y.size, n_folds=cv)
                for train, test in kfold:
                    reg.fit(X[train], y[train])
                    y_pred_i_reg[test] = reg.predict(X[test])
                
                residuals[idx_example] = y - y_pred_i_reg #residuals
            else:
                tmp_predict = cross_val_predict(reg, X, y, cv=cv)
                tmp_score[i_reg,0] = r2_score(y,tmp_predict).mean() 
                tmp_score[i_reg,1] = mean_squared_error(y,tmp_predict).mean()
                #tmp_score[i_reg] = cross_val_score(reg, X, y, cv=cv, scoring=scoring).mean()
        
        if scoring == 'residual_tests':
            features_regression[j,0] = durbin_watson(residuals)
            features_regression[j,[1,2]] = omni_normtest(residuals) 
            features_regression[j,3:] = jarque_bera(residuals)
        else:
            features_regression[j] = tmp_score.mean(0)

    return features_regression
def fit(y, X, reg_names):
    nr = len(reg_names)
    
    try:
        mod = sm.GLSAR(y.values, X, 2, missing = 'drop') # MLR analysis with AR2 modeling
        res = mod.iterative_fit()
        output = xr.Dataset({'coef': (['reg_name'], res.params[1:]), \
                'conf_int': (['reg_name', 'limit'], res.conf_int()[1:,:]), \
                'p_value': (['reg_name'],  res.pvalues[1:]), \
                'DWT': (sms.durbin_watson(res.wresid)), \
                'CoD': (res.rsquared)}, \
                coords = {'reg_name': (['reg_name'], reg_names),\
                          'limit': (['limit'], ['lower', 'upper'])})
    except: 
        nans = np.full([nr], np.nan)
        output = xr.Dataset({'coef': (['reg_name'], nans), \
                'conf_int': (['reg_name', 'limit'], np.array([nans, nans]).T), \
                'p_value': (['reg_name'],  nans), \
                'DWT': (np.nan), \
                'CoD': (np.nan)}, \
                coords = {'reg_name': (['reg_name'], reg_names),\
                          'limit': (['limit'], ['lower', 'upper'])})

    return output
Esempio n. 34
0
def fitdata(f, Xdata,Ydata,Errdata, pguess, ax=False, ax2=False):
    '''
    popt = vector of length N of the optimized parameters
    pcov = Covariance matrix of the fit
    perr = vector of length N of the std-dev of the optimized parameters
    p95 = half width of the 95% confidence interval for each parameter 
    p_p = vector of length N of the p-value for the parameters being zero
    (if p<0.05, null hypothesis rejected and parameter is non-zero) 
    chisquared = chisquared value for the fit
    chisquared_red = chisquared/degfreedom
    chisquare = (p, chisquared, chisquared_red, degfreedom) 
    p = Probability of finding a chisquared value at least as extreme as the one shown
    chisquared_red = chisquared/degfreedom. value should be approx. 1 for a good fit. 
    R2 = correlation coefficient or proportion of explained variance 
    R2_adj = adjusted R2 taking into account number of predictors 
    resanal = (p, w, mean, stddev)
    Analysis of residuals 
    p = Probability of finding a w at least as extreme as the one observed (should be high for good fit) 
    w = Shapiro-Wilk test criterion 
    mean = mean of residuals 
    p_res = probability that the mean value obtained is different from zero merely by chance 
    F = F-statistic for the fit msm/msE. 
    Null hypothesis is that there is NO Difference between the two variances. 
    p_F = probability that this value of F can arise by chance alone.
    p_F < 0.05 to reject null hypothesis and prove that the fit is good.
    dw = Durbin_Watson statistic (value between 0 and 4). 
    2 = no-autocorrelation. 0 = .ve autocorrelation, 4 = -ve autocorrelation. 
'''    
    
    def error(p,Xdata,Ydata,Errdata):
        Y=f(Xdata,p)
        residuals=(Y-Ydata)/Errdata
        return residuals
    res=scipy.optimize.leastsq(error,pguess,args=(Xdata,Ydata,Errdata),full_output=1)
    (popt,pcov,infodict,errmsg,ier)=res
    perr=scipy.sqrt(scipy.diag(pcov))

    M=len(Ydata)
    N=len(popt)
    #Residuals
    Y=f(Xdata,popt)
    residuals=(Y-Ydata)/Errdata
    meanY=scipy.mean(Ydata)
    squares=(Y-meanY)/Errdata
    squaresT=(Ydata-meanY)/Errdata
    
    SSM=sum(squares**2) #Corrected Sum of Squares
    SSE=sum(residuals**2) #Sum of Squares of Errors
    SST=sum(squaresT**2)#Total Corrected sum of Squares
    
    DFM=N-1 #Degree of Freedom for model
    DFE=M-N #Degree of Freedom for error
    DFT=M-1 #Degree of freedom total
    
    MSM=SSM/DFM #Mean Squares for model(explained Variance)
    MSE=SSE/DFE #Mean Squares for Error(should be small wrt MSM) unexplained Variance
    MST=SST/DFT #Mean squares for total
    
    R2=SSM/SST #proportion of unexplained variance 
    R2_adj= 1-(1-R2)*(M-1)/(M-N-1) #Adjusted R2
    
    #t-test to see if parameters are different from zero
    t_stat=popt/perr #t-stat for popt different from zero
    t_stat=t_stat.real
    p_p= 1.0-scipy.stats.t.cdf(t_stat,DFE) #should be low for good fit
    z=scipy.stats.t(M-N).ppf(0.95)
    p95=perr*z
    #Chi-Squared Analysis on Residuals
    chisquared=sum(residuals**2)
    degfreedom=M-N
    chisquared_red=chisquared/degfreedom
    p_chi2=1.0-scipy.stats.chi2.cdf(chisquared, degfreedom)
    stderr_reg=scipy.sqrt(chisquared_red)
    chisquare=(p_chi2,chisquared,chisquared_red,degfreedom,R2,R2_adj)
    
    #Analysis of Residuals
    w, p_shapiro=scipy.stats.shapiro(residuals)
    mean_res=scipy.mean(residuals)
    stddev_res=scipy.sqrt(scipy.var(residuals))
    t_res=mean_res/stddev_res #t-statistics
    p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
    
    F=MSM/MSE
    p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
    
    dw=stools.durbin_watson(residuals)
    resanal=(p_shapiro,w,mean_res,p_res,F,p_F,dw)
    
    if ax:
        formataxis(ax)
        ax.plot(Ydata,Y,'ro')
        ax.errorbar(Ydata,Y,yerr=Errdata, fmt='.')
        Ymin,Ymax=min((min(Y),min(Ydata))),max((max(Y),max(Ydata)))
        ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
        
        ax.xaxis.label.set_text('Data')
        ax.yaxis.label.set_text('Fitted')
        sigmay,avg_stddev_data=get_stderr_fit(f,Xdata,popt,pcov)
        Yplus=Y+sigmay
        Yminus=Y-sigmay
        ax.plot(Y,Yplus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
        ax.plot(Y,Yminus,'c',alpha=0.6,linestyle='==',linewidth=0.5)
        ax.fill_between(Y,Yminus,Yplus,facecolor='cyan',alpha=0.5)
        titletext='Parity plot for fit.\n'
        titletext+=r'$r^2$=%5.2f,$r^2_{adj}$=%5.2f,$p_{shapiro}$=%5.2f,$Durbin-Watson=%2.1f'
        titletext+='\n F=%5.2f,$p_F$=%3.2e'
        titletext+='$\sigma_{err}^{reg}$=%5.2f'
        
        #ax.title.set_text(titletext%(R2, R2_adj, avg_stddev_data, chisquared_red, p_chi2, stderr_reg))
        ax.figure.canvas.draw()
    
    if ax2:
        formataxis(ax2)
        ax2.plot(Y,residuals,'ro')
        ax2.xaxis.label.set_text('Fitted Data')
        ax2.yaxis.label.set_text('Residuals')
        
        titletext='Analysis of Residuals\n'
        titletext+=r'mean=%5.2f,$p_{res}$=%5.2f,$p_{shapiro}$=%5.2f,$Durbin-Watson$=%2.1f'
        titletext+='\n F=%5.2f,$p_F$=%3.2e'
        ax2.title.set_text(titletext%(mean_res,p_res,p_shapiro,dw,F,p_F))
    return popt,pcov,perr, p95, p_p,chisquare, resanal
Esempio n. 35
0
 def test_durbin_watson_3d(self):
     shape = (10, 1, 10)
     x = np.random.standard_normal(100)
     dw = sum(np.diff(x)**2.0) / np.dot(x, x)
     x = np.tile(x[None, :, None], shape)
     assert_almost_equal(np.squeeze(dw * np.ones(shape)), durbin_watson(x, axis=1))
Esempio n. 36
0
print R2
print R2_adj


chisquared=sum(residuals**2)
Dof=M-N
chisquared_red=chisquared/Dof
p_chi2=1-scipy.stats.chi2.cdf(chisquared,Dof)
stderr_reg=scipy.sqrt(chisquared_red)
chisquare=(p_chi2,chisquared,chisquared_red,Dof,R2,R2_adj)
print chisquare

w,p_shapiro=scipy.stats.shapiro(residuals)
mean_res=scipy.mean(residuals)
stddev_res=scipy.sqrt(scipy.var(residuals))
t_res=mean_res/stddev_res
p_res=1-scipy.stats.t.cdf(t_res,M-1)
print p_res
if p_res<0.05:
    print ('Null Hypothesesis in rejected')


F=MSM/MSE
p_F=1-scipy.stats.f.cdf(F,DFM,DFE)
if p_F <0.05:
    print ('Null hypothesis is rejected')

dw=stools.durbin_watson(residuals)

resanal=(p_shapiro,w,mean_res,p_res,F,p_F,dw)
Esempio n. 37
0
    def test_frame_timeseries_durbin_watson(self):
        """Test Durbin Watson"""
        result = self.frame.timeseries_durbin_watson_test("logM")
        db_result = smst.durbin_watson(self.pandaframe["logM"])

        self.assertAlmostEqual(result, db_result, delta=0.0000000001)
Esempio n. 38
0
def fitdata(f,Xdata,Ydata,Errdata,pguess,dict_data,ax=False,ax2=False):
    
    def error(p,Xdata,Ydata,Errdata,dict_data):
        Y=f(Xdata,p,dicct_data)
        residuals=(Y-Ydata)/Errdata
        return residuals
    res=scipy.optimize.leastsq(error,pguess.args=(Xdata,Ydata,Errdata,dict_data),full_output=1)
    (popt,pcov,infodict,errmsg,ier)=res
    perr=scipy.sqrt(scipy.diag(pcov))
    
    M=len(Ydata)
    N=len(popt)
    
    Y=f(Xdata,popt,dict_data)
    residuals=(Y-Ydata)/Errdata
    meanY=scipy.mean(Ydata)
    squares=(Y-meanY)/Errdata
    squaresT=(Ydata-meanY)/Errdata
    
    SSM=sum(squares**2)
    SSE=sum(residuals**2)
    SST=sum(squaresT**2)
    
    DFM=N-1
    DFE=M-N
    DFT=M-1
    
    MSM=SSM/DFM
    MSE=SSE/DFE
    MST=SST/DFT
    
    R2=SSM/SST
    R2_adj=1-(1-R2)*(M-1)/(M-N-1)
    
    t_stat=popt/perr
    t_stat=t_stat.real
    p_p=1.0-scipy.stats.t.cdf(t_stat,DFE)
    z=scipy.stats.t(M-N).ppf(0.95)
    p95=perr*z
    
    chisquared=sum(residuals**2)
    degfreeedom=M-N
    chisqured_red=chisquared/degfreedom
    p_chi2=1.0-scipy.stats.chi2.cdf(chisquared,degfreedom)
    chisquare=(p_chi2,chisquared,chisquared_red,degfreedom,R2,R2_adj)
    
    w,p_shapiro=scipy.stats.shapiro(residuals)
    mean_res=scipy.mean(residuals)
    stddev_res=scipy.sqrt(scipy.var(residuals))
    t_res=mean_res/stddev_res
    p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
    
    F=MSM/MSE
    p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
    
    dw=stools.durbin_watson(residuals)
    resanal=(p_shapiro,w,mean_res,p_res,F,p_F,dw)
    
    if ax:
        formataxis(ax)
        ax.plotdata(Ydata,Y,'ro')
        ax.errorbar(Ydata,Y,yerr=Errdata,fmt='.')
        Ymin,Ymax=min((min(Y),min(Ydata))),max((max(Y),max(Ydata)))
        ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
        
        ax.xaxis.label.set_text('Data')
        ax.yaxis.label.set_text('Fitted')
        
        sigmay,avg_stddev_data=get_stderr_fit(f,Xdata,popt,pcov,dict_data)
        Yplus=Y+sigmay
        Yminus=Y-sigmay
        ax.plot(Y,Yplus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
        ax.plot(Y,Yminus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
        ax.fill_between(Y,Yminus,Yplus,facecolor='cyan',alpha=0.5)
        titletext ='parity plot for fit.\n'
        titletext +=r'$r^2$=%5.2f,$r^2_{adj}$=%5.2f,'
        titletext +='$\sigma_{exp}$=%5.2f,$\chi^2_{\nu}$=%5.2f,$p_{\chi^2}$=%5.2f,'
        titletext +='$\sigma_{err}^{reg}$=%5.2f'
        
        ax.title.set_text(titletext%(R2,R2_adj,avg_stddev_data,chisquared_red,p_chi2,stderr_reg))
        ax.figure.canvas.draw()
        
    if ax2:
        formataxis(ax2)
        ax2.plot(Y,residuals,'ro')
        
        ax2.xaxis.label.set_text('Fitted Data')
        ax2.yaxis.label.set_text('Residuals')
        
        titletext ='Analysis of Residuals\n'
        titletext +=r'mean=%5.2f,$p_{res}$=%5.2f,$p_{shapiro}$=%5.2f, $Durbin-Watson=%2.1f'
        titletext +='\n F=%5.2f, $p_F$=%3.2e'
        ax2.title.set_text(titletext%(mean_res,p_res,p_shapiro,dw,F,p_F))
        
        ax2.figure.canvas.draw()
        
    return popt,pcov,perr,p95,p_p,chisquare,resanal
Esempio n. 39
0
def fitdata(f,Xdata,Ydata,Errdata,pguess,dict_data,ax=False,ax2=False):
 
     def error(p, Xdata, Ydata, Errdata, dict_data):
        Y=f(Xdata, p,dict_data)
        residuals= (Y-Ydata)/Errdata
        return residuals
     res = scipy.optimize.leastsq(error, pguess, args=(Xdata, Ydata, Errdata, dict_data), full_output=1)
     (popt, pcov, infodict, errmsg, ier) = res
     perr=scipy.sqrt(scipy.diag(pcov))
     M= len(Ydata)
     N=len(popt)
     ''' Residuals: '''
     Y=f(Xdata,popt,dict_data)
     residuals=(Y-Ydata)/Errdata
     meanY=scipy.mean(Ydata)
     squares=(Y-meanY)/Errdata
     squaresT=(Ydata-meanY)/Errdata
     
     print "Residuals:\n",residuals
    
     SSM=sum(squares**2) #Corrected Sum of Squares
     SSE=sum(residuals**2) #Sum of Squares of Errors
     SST=sum(squaresT**2) #Total corrected sum of squares
     ''' Degrees of Freedom: '''
     DFM=N-1 #Degrees of freedom for model
     DFE=M-N #Degrees of freedom for error
  #   DFT=M-1 #Degrees of freedom total
    
     MSM=SSM/DFM #Mean squares for model (explained variance)
     MSE=SSE/DFE #Mean squares for Error (should be small wrt MSM) Unexplained Variance
    # MST=SST/DFT #Mean squares for total
    
     R2=SSM/SST #proportion of explained variance
     R2_adj=1-(1-R2)*(M-1)/(M-N-1) #Adjusted R2
     ''' t-test : '''    
    #t-test to see if parameters are different from zero
     t_stat=popt/perr #t-statistic for popt different from zero'
     t_stat=t_stat.real
     p_p=1.0-scipy.stats.t.cdf(t_stat,DFE) #should be low for good fit.
     z=scipy.stats.t(M-N).ppf(0.95)
     p95=perr*z
     ''' Chi squared Analysis on Residuals: '''
     chisquared=sum(residuals**2)
     degfreedom=M-N
     chisquared_red=chisquared/degfreedom
     p_chi2=1.0-scipy.stats.chi2.cdf(chisquared,degfreedom)
     stderr_reg=scipy.sqrt(chisquared_red)
     chisquare=(p_chi2,chisquared, chisquared_red, degfreedom,R2,R2_adj)
    
     ''' Residual Analysis: '''
     w,p_shapiro=scipy.stats.shapiro(residuals)
     mean_res=scipy.mean(residuals)
     stddev_res=scipy.sqrt(scipy.var(residuals))
     t_res=mean_res/stddev_res #t-statistic to test that mean_res is zero.
     p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
        #if p_res <0.05, null hypothesis rejected and mean is non-zero.
        #Should be high for good fit.
     
     ''' F-test on Residuals: '''
     F=MSM/MSE #explained variance/unexplained . Should be large
     p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
        #if p_F <0.05n, null-hypothesis is rejected
        #i.e. R^2>0 and at least one of the fitting parameters >0.
     dw=stools.durbin_watson(residuals)
     resanal=(p_shapiro,w,mean_res,F,p_F,dw)
     if ax:
         formataxis(ax)
         ax.plot(Ydata,Y,'ro')
         ax.errorbar(Ydata,Y,yerr=Errdata,fmt='.')
         Ymin,Ymax=min((min(Y),min(Ydata))),max((max(Y),max(Ydata)))
         ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
        
         ax.xaxis.label.set_text('Data')
         ax.yaxis.label.set_text('Fitted')
        
         sigmay,avg_stddev_data=get_stderr_fit(f,Xdata,popt,pcov,dict_data)
         Yplus=Y+sigmay
         Yminus=Y-sigmay
         ax.plot(Y,Yplus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
         ax.plot(Y,Yminus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
         ax.fill_between(Y,Yminus,Yplus,facecolor='cyan',alpha=0.5)
         titletext='Parity Plot for Fitted Data \n'
         titletext+=r'R^2=%5.2f, Adjusted Residual square=%5.2f \n '
         titletext +='Exp. sigma=%5.2f, $\chi^2_{\nu}$=%5.2f, $p_{\chi^2}$=%5.2f, '
         titletext +='$\sigma_{err}^{reg}$=%5.2f'
         print "Standard Deviation of Y:\n",sigmay
         print "Positive Deviation of Y:\n ",Yplus
         print "Negative Deviation of Y:\n ",Yminus
                  
         ax.title.set_text(titletext%(R2,R2_adj, avg_stddev_data, chisquared_red,  p_chi2, stderr_reg))
         ax.figure.canvas.draw()
        
     if ax2:#Test for homoscedasticity
        formataxis(ax2)
        ax2.plot(Y,residuals,'ro')
        
        ax2.xaxis.label.set_text('Fitted Data')
        ax2.yaxis.label.set_text('Residuals')
        
        titletext='Analysis of Residuals\n'
        titletext+=r'mean=%5.2f, $p_{res}$=%5.2f, $p_{shapiro}$=%5.2f, $Durbin-Watson$=%2.1f'
        titletext+='\nF=%5.2f, $p_F$=%3.2e'
        ax2.title.set_text(titletext%(mean_res, p_res, p_shapiro, dw , F, p_F))
        
        ax2.figure.canvas.draw()
        
     return popt,pcov,perr,p95,p_p,chisquare,resanal
Esempio n. 40
0
def fitdata(f,Xdata,Ydata,Errdata,pguess,dict_data,ax=False,ax2=False):
    '''
    fitdata(f,Xdata,Ydata,Errdata,pguess,dict_data)
        f=function f(X,p,dict_data)
        Xdata=array like object (k,M) shaped array for data with k predictors
        e.g. if X = (x1,x2,x3) then X=(X1,X2,X3) where X1 is a vector of x1 etc
        Ydata=array like object of length M
        Errdata=array like object of length M: error estimate of ydata.
        pguess=array like object of length N(vector of guess of parameters)
        dict_data= dictionary containing other data necessary for f
    Returns:
        popt=vector of length N of the optimized parameters
        pcov=covariance matrix of the fit
        perr=vector of length N of the std-dev of the optimized parameters
        p95=half width of the 95% confidence interval for each parameter i.e. popt-p95 and popt+p95
        p_p=vector of length N of the p-value for the parameters being zero
                (if p<0.05,null hypothesis rejected and parameter is non-zero)
        chisquared=(chisquared,chisquared_red,degfreedom,p)
        chisquared=chisquared value for the fit:sum of squared of weighted residuals
        chisquared_red=chisquared/degfreedom. Value should be approx. 1 for a good fit.
        degfreedom=M-N the degrees of freedom of the fitting
        chisquare=(p,chisquared,chisquared_red,degfreedom)
                p=Probability of finding a chisquared value at least as extreme as the one shown
                  purely by random chance(should be high for good fit)
                chisquared=chisquared value for the fit: sum of squares of weighted residuals
                chisquared_red=chisquared/degfreedom. Value should be  approx. 1 for a good fit.
                degfreedom=M-N the  degrees of freedom of the fitting
                R2=correlation coefficient or proportion of explained variance
                R2_adj=adjusted R2 taking into account number of predictors
        resanal=(p,w,mean,stddev) Analysis of residuals
                p=Probability of finding a w at least as extreme as the one observed (should be high for good fit)
                w=Shapiro-wilk test criterion
                mean=mean of residuals
                p_res=probability that the mean value obtained is different form zero merely by chance
                (should be low for good fit)
                The mean must be within 1 stddev of zro for highly significant fitting.
                F=F-statistic for the fit MSM/MSE
                Null hypothesis is that there is NO Difference between the twpo variances.
                p_F=probability that this value of F can arise by chance alone.
                   p_F<0.05 to reject null hypothesis and prove that the fit is good
                dw=Durbin_Watson statistic (value between 0 and 4).
                   2=no-autocorrelation.  0=+ve autocorrelation, 4 = -ve autocorrelation.
    '''
    
    def error(p, Xdata, Ydata, Errdata, dict_data):
        Y=f(Xdata, p,dict_data)
        residuals= (Y-Ydata)/Errdata
        return residuals
    
    res = scipy.optimize.leastsq(error, pguess, args=(Xdata, Ydata, Errdata, dict_data), full_output=1)
    (popt, pcov, infodict, errmsg, ier) = res
    perr=scipy.sqrt(scipy.diag(pcov))
    
    M=len(Ydata)
    N=len(popt)
    #Residuals
    Y=f(Xdata,popt,dict_data)
    residuals=(Y-Ydata)/Errdata
    meanY=scipy.mean(Ydata)
    squares=(Y-meanY)/Errdata
    squaresT=(Ydata-meanY)/Errdata
    
    SSM=sum(squares**2) #Corrected Sum of Squares
    SSE=sum(residuals**2) #Sum of Squares of Errors
    SST=sum(squaresT**2) #Total corrected sum of squares
    
    DFM=N-1 #Degrees of freedom for model
    DFE=M-N #Degrees of freedom for error
    DFT=M-1 #Degrees of freedom total
    
    MSM=SSM/DFM #Mean squares for model (explained variance)
    MSE=SSE/DFE #Mean squares for Error (should be small wrt MSM) Unexplained Variance
    MST=SST/DFT #Mean squares for total
    
    R2=SSM/SST #proportion of explained variance
    R2_adj=1-(1-R2)*(M-1)/(M-N-1) #Adjusted R2
    
    #t-test to see if parameters are different from zero
    t_stat=popt/perr #t-statistic for popt different from zero'
    t_stat=t_stat.real
    p_p=1.0-scipy.stats.t.cdf(t_stat,DFE) #should be low for good fit.
    z=scipy.stats.t(M-N).ppf(0.95)
    p95=perr*z
    #Chisquared Analysis on Residuals
    chisquared=sum(residuals**2)
    degfreedom=M-N
    chisquared_red=chisquared/degfreedom
    p_chi2=1.0-scipy.stats.chi2.cdf(chisquared,degfreedom)
    stderr_reg=scipy.sqrt(chisquared_red)
    chisquare=(p_chi2,chisquared, chisquared_red, degfreedom,R2,R2_adj)
    
    #Analysis of Residuals
    w,p_shapiro=scipy.stats.shapiro(residuals)
    mean_res=scipy.mean(residuals)
    stddev_res=scipy.sqrt(scipy.var(residuals))
    t_res=mean_res/stddev_res #t-statistic to test that mean_res is zero.
    p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
        #if p_res <0.05, null hypothesis rejected and mean is non-zero.
        #Should be high for good fit.
    #F-test on residuals
    F=MSM/MSE #explained variance/unexplained . Should be large
    p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
        #if p_F <0.05n, null-hypothesis is rejected
        #i.e. R^2>0 and at least one of the fitting parameters >0.
    dw=stools.durbin_watson(residuals)
    resanal=(p_shapiro,w,mean_res,F,p_F,dw)
    
    
    
    if ax:
        formataxis(ax)
        ax.plot(Ydata,Y,'ro')
        ax.errorbar(Ydata,Y,yerr=Errdata,fmt='.')
        Ymin,Ymax=min((min(Y),min(Ydata))),max((max(Y),max(Ydata)))
        ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
        
        ax.xaxis.label.set_text('Data')
        ax.yaxis.label.set_text('Fitted')
        
        sigmay,avg_stddev_data=get_stderr_fit(f,Xdata,popt,pcov,dict_data)
        Yplus=Y+sigmay
        Yminus=Y-sigmay
        ax.plot(Y,Yplus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
        ax.plot(Y,Yminus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
        ax.fill_between(Y,Yminus,Yplus,facecolor='cyan',alpha=0.5)
        titletext='Parity plot for fit.\n'
        titletext+=r'$r^2$=%5.2f, $r^2_{adj}$=%5.2f, '
        titletext +='$\sigma_{exp}$=%5.2f,$\chi^2_{\nu}$=%5.2f, $p_{\chi^2}$=%5.2f, '
        titletext +='$\sigma_{err}^{reg}$=%5.2f'
        
        ax.title.set_text(titletext%(R2,R2_adj, avg_stddev_data, chisquared_red,  p_chi2, stderr_reg))
        ax.figure.canvas.draw()
        
    if ax2:#Test for homoscedasticity
        formataxis(ax2)
        ax2.plot(Y,residuals,'ro')
        
        ax2.xaxis.label.set_text('Fitted Data')
        ax2.yaxis.label.set_text('Residuals')
        
        titletext='Analysis of Residuals\n'
        titletext+=r'mean=%5.2f, $p_{res}$=%5.2f, $p_{shapiro}$=%5.2f, $Durbin-Watson$=%2.1f'
        titletext+='\nF=%5.2f, $p_F$=%3.2e'
        ax2.title.set_text(titletext%(mean_res, p_res, p_shapiro, dw , F, p_F))
        
        ax2.figure.canvas.draw()
        
    return popt,pcov,perr,p95,p_p,chisquare,resanal
def fitdata(f,XData,YData,ErrData,pguess,ax=False,ax2=False):
    
    def error(p,XData,YData,ErrData):
        Y=f(XData,p)
        residuals=(Y-YData)/ErrData
        return residuals
    res=scipy.optimize.leastsq(error,pguess,args=(XData,YData,ErrData),full_output=1)
    (popt,pcov,infodict,errmsg,ier)=res
    
    perr=scipy.sqrt(scipy.diag(pcov))
    
    M=len(YData)
    N=len(popt)
    #residuals
    Y=f(XData,popt)
    residuals=(Y-YData)/ErrData
    meanY=scipy.mean(YData)
    squares=(Y-meanY)/ErrData
    squaresT=(YData-meanY)/ErrData
    
    SSM=sum(squares**2) #Corrected Sum of Squares
    SSE=sum(residuals**2)#Sum of Squares of Errors
    SST=sum(squaresT**2)#Totaal corrected sum of squares
    
    DFM=N-1 #degrees of freedom for model
    DFE=M-N #degrees of freedom for error
    DFT=M-1 #degrees of freedom total
    
    MSM=SSM/DFM #Mean squares for model (explained variance)
    MSE=SSE/DFE #Mean squares for error
    MST=SST/DFT #Mean squares for total
    
    R2=SSM/SST #proportion of explained varience
    R2_adj=1-(1-R2)*(M-1)/(M-N-1)#Adjusted R2
    
    #t-test to see if parameters are different from zero
    t_stat=popt/perr #t-statistic for popt different from zero
    t_stat=t_stat.real
    p_p=1.0-scipy.stats.t.cdf(t_stat,DFE) #should be low for good fit.
    z=scipy.stats.t(M-N).ppf(0.95)
    p95=perr*z
    
    #chisquared analysis on residuals
    chisquared=sum(residuals**2)
    degfreedom=M-N
    chisquared_red=chisquared/degfreedom
    p_chi2=1.0-scipy.stats.chi2.cdf(chisquared,degfreedom)
    stderr_reg=scipy.sqrt(chisquared_red)
    chisquare=(p_chi2,chisquared_red,degfreedom,R2,R2_adj)
    
    #Analysis of Residuals
    w,p_shapiro=scipy.stats.shapiro(residuals)
    mean_res=scipy.mean(residuals) #mean of all residuals
    stddev_res=scipy.sqrt(scipy.var(residuals)) #standard deviation of all residuals
    t_res=mean_res/stddev_res #t-statistic to test that was mean_res is zero.
    p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
         #if p_res<0.05,null-hypothesis is rejected and mean is non-zero
         #should be high for good fit.
    # F-test on residuals
    F=MSM/MSE #explained/un-explained. Should be large
    p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
         #if p_F<0.05,null hypothesis is rejected
         #i.e. R^2>0 and at least one of the fitting parameters >0.
    dw=stools.durbin_watson(residuals)
    resanal=(p_shapiro,w,mean_res,p_res,F,p_F,dw)
    if ax:
        formataxis(ax)
        ax.plot(YData,Y,'ro')
        ax.errorbar(YData,Y,yerr=ErrData,fmt='.')
        Ymin,Ymax=min((min(Y),min(YData))), max((max(Y),max(YData)))
        ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
        
        ax.xaxis.label.set_text('Data')
        ax.yaxis.label.set_text('Fitted')
        
        sigmaY,avg_stddev_data=get_stderr_fit(f,XData,popt,pcov)
        Yplus=Y+sigmaY
        Yminus=Y-sigmaY
        ax.plot(Y,Yplus,'C',alpha=0.6,linestyle='--',linewidth=0.5)
        ax.plot(Y,Yminus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
        ax.fill_between(Y,Yminus,Yplus,facecolor='cyan',alpha=0.5)
        titletext='Parity plot for fit.\n'
        titletext += r'$r^2$ = %5.2f, $r^2_(adj)$= %5.2f'
        titletext +='$\sigma_{exp}$ = %5.2f, $\chi^2_{\nu}$=%5.2f, $p_{\chi^2}$=%5.2f,'
        titletext +='$\sigma_{ree}^{reg}$ = %5.2f'
        
        ax.title.set_text(titletext%(R2,R2_adj,avg_stddev_data,chisquared_red,p_chi2,stderr_reg))
        ax.figure.canvas.draw()
        
    if ax2: #test for homoscedasticity
        formataxis(ax2)
        ax2.plot(Y,residuals,'ro')
        
        ax2.xaxis.label.set_text('Fitted Data')
        ax2.yaxis.label.set_text('Residuals')
        
        titletext = 'Analysis of residuals\n'
        titletext +=r'mean= %5.2f, $p_{res}$= %5.2f, $p_{shapiro}$=%5.2f , $Durbin-watson$=%2.1f'
        titletext +='\n F=%5.2f,$p_F$ = %3.2e'
        ax2.title.set_text(titletext%(mean_res,p_res,p_shapiro,dw,F,p_F))
        
        ax2.figure.canvas.draw()
        
    return popt,pcov,perr,p95,p_p,chisquare,resanal
Esempio n. 42
0
chisquare=sum((j**2)/((sigmai**2)*81))


sigmasq=sum(((u-ucalc)**2)/9)
sigmau=scipy.sqrt(sigmasq)

dumean2=(u-umean)**2  
ducalc2=(u-ucalc)**2

dsducalc2=sum(ducalc2)
dsdumean2=sum(dumean2)
r2=1-dsducalc2/dsdumean2

residual=(u-ucalc)/e_q #calculating the residuals
w,p_shapiro=scipy.stats.shapiro(residual)#shapiro wilk test
dw=stools.durbin_watson(residual)#durbin watson test

DFM=8
DFE=1


squares=(ucalc-umean)
squaresT=(u-umean)
residuals=(ucalc-u)

SSM=sum(squares**2)
SSE=sum(residuals**2)
SST=sum(squaresT**2)

MSM=SSM/DFM
MSE=SSE/DFE
Esempio n. 43
0
sunspot_data = sm.datasets.sunspots.load_pandas().data
sunspot_data.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))
del sunspot_data["YEAR"]

sunspot_data.plot(figsize=(12,8));

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(sunspot_data.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(sunspot_data, lags=40, ax=ax2)

arma_mod20 = sm.tsa.ARMA(sunspot_data, (2,0)).fit()
arma_mod30 = sm.tsa.ARMA(sunspot_data, (3,0)).fit()

stattools.durbin_watson(arma_mod30.resid.values)

fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
ax = arma_mod30.resid.plot(ax=ax)

resid = arma_mod30.resid

diag.normal_ad(resid)

fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
fig = qqplot(resid, line='q', ax=ax, fit=True)

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
Esempio n. 44
0
def fitdata(f, Xdata,Ydata,Errdata, pguess,dict_data, ax=False, ax2=False):
    def error(p,Xdata,Ydata,Errdata,dict_data):
        Y=f(Xdata,p,dic_data)
        residuals=(Y-Ydata)/Errdata
    return residuals
    
    res=scipy.optimize.leastsq(error,pguess,args=(Xdata,Ydata,Errdata,dict_data),full_output=1)
    (popt,pcov,infodict,errmsg,ier)=res
    perr=scipy.sqrt(scipy.diag(pcov))

    M=len(Ydata)
    N=len(popt)
    #Residuals
    Y=f(Xdata,popt,dict_data)
    residuals=(Y-Ydata)/Errdata
    meanY=scipy.mean(Ydata)
    squares=(Y-meanY)/Errdata
    squaresT=(Ydata-meanY)/Errdata
    
    SSM=sum(squares**2) #Corrected Sum of Squares
    SSE=sum(residuals**2) #Sum of Squares of Errors
    SST=sum(squaresT**2)#Total Corrected sum of Squares
    
    DFM=N-1 #Degree of Freedom for model
    DFE=M-N #Degree of Freedom for error
    DFT=M-1 #Degree of freedom total
    
    MSM=SSM/DFM #Mean Squares for model(explained Variance)
    MSE=SSE/DFE #Mean Squares for Error(should be small wrt MSM) unexplained Variance
    MST=SST/DFT #Mean squares for total
    
    R2=SSM/SST #proportion of unexplained variance 
    R2_adj= 1-(1-R2)*(M-1)/(M-N-1) #Adjusted R2
    
    #t-test to see if parameters are different from zero
    t_stat=popt/perr #t-stat for popt different from zero
    t_stat=t_stat.real
    p_p= 1.0-scipy.stats.t.cdf(t_stat,DFE) #should be low for good fit
    z=scipy.stats.t(M-N).ppf(0.95)
    p95=perr*z
    #Chi-Squared Analysis on Residuals
    chisquared=sum(residuals**2)
    degfreedom=M-N
    chisquared_red=chisquared/degfreedom
    p_chi2=1.0-scipy.stats.chi2.cdf(chisquared, degfreedom)
    stderr_reg=scipy.sqrt(chisquared_red)
    chisquare=(p_chi2,chisquared,chisquared_red,degfreedom,R2,R2_adj)
    
    #Analysis of Residuals
    w, p_shapiro=scipy.stats.shapiro(residuals)
    mean_res=scipy.mean(residuals)
    stddev_res=scipy.sqrt(scipy.var(residuals))
    t_res=mean_res/stddev_res #t-statistics
    p_res=1.0-scipy.stats.cdf(t_res,M-1)
    
    F=MSM/MSE
    p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
    
    dw=stools.durbin_watson(residuals)
    resanal=(p_shapiro,w,mean_res,p_res,F,p_F,dw)
    
    if ax:
        formataxis(ax)
        ax.plot(Ydata,Y,'ro')
        ax.errorbar(Ydata,Y,yerr=Errdata, fmt='.')
        Ymin,Ymax=min((min(Y),min(Ydata))),max((max(Y),max(Ydata)))
        ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
        
        ax.xaxis.label.set_text('Data')
        ax.yaxis.label.set_text('Fitted')
        sigmay,avg_stddev_data=get_stderr_fit(f,xdata,popt,pcov,dict_data)
        Yplus=Y+sigmay
        Yminus=Y-sigmay
        ax.plot(Y,Yplus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
        ax.plot(Y,Yminus,'c',alpha=0.6,linestyle='==',linewidth=0.5)
        ax.fill_between(Y,Yminus,Yplus,facecolor='cyan',alpha=0.5)
        titletext='Parity plot for fit.\n'
        titletext+=r'$r^2$=%5.2f,$r^2_{adj}$=%5.2f,$p_{shapiro}$=%5.2f,$Durbin-Watson=%2.1f'
        titletext+='\n F=%5.2f,$p_F$=%3.2e'
        titletext+='$\sigma_{err}^{reg}$=%5.2f'
        
        ax.title.set.text(titletext%(R2,R2_adj,avg_stddev_data,chisquared_red,p_chi2,stderr_reg))
        ax.figure.canvas.draw()
    
    if ax2:
        formataxis(ax2)
        ax2.plot(Y,residuals,'ro')
        ax2.xaxis.label.set_text('Fitted Data')
        ax2.yaxis.label.set_text('Residuals')
        
        titletext='Analysis of Residuals\n'
        titletext+=r'mean=%5.2f,$p_{res}$=%5.2f,$p_{shapiro}$=%5.2f,$Durbin-Watson$=%2.1f'
        titletext+='\n F=%5.2f,$p_F$=%3.2e'
        ax2.title.set_text(titletext%(mean_res,p_res,p_shapiro,dw,F,p_F))
    return popt,pcov,perr, p95, p_p,chisquare, resanal
Esempio n. 45
0
def fitdata(f,Xdata,Ydata,Errdata,pguess,ax=False,ax2=False):

    def error(p,Xdata,Ydata,Errdata):
        Y=f(Xdata,p)
        residuals=(Y-Ydata)/Errdata
        return residuals
    res=scipy.optimize.leastsq(error,pguess,args=(Xdata,Ydata,Errdata),full_output=1)    
    (popt,pcov,infodict,errmsg,ier)=res
    perr=scipy.sqrt(scipy.diag(pcov))
    M=len(Ydata)
    N=len(popt)
    Y=f(Xdata,popt)
    residuals=(Y-Ydata)/Errdata
    meanY=scipy.mean(Ydata)
    squares=(Y-meanY)/Errdata
    squaresT=(Ydata-meanY)/Errdata
    
    SSM=sum(squares**2)
    SSE=sum(residuals**2)
    SST=sum(squaresT**2)
    
    DFM=N-1
    DFE=M-N
    DFT=M-1
    
    MSM=SSM/DFM
    MSE=SSE/DFE
    MSM=SST/DFT
    
    '''R2'''
    R2=SSM/SST
    R2_adj=1-(1-R2)*(M-1)/(M-N-1)
    
    '''t-test'''
    t_stat=popt/perr
    t_stat=t_stat.real
    p_p=1.0-scipy.stats.t.cdf(t_stat,DFE)
    z=scipy.stats.t(M-N).ppf(0.95)
    p95=perr*z
    
    '''chi-square'''
    chisqred=sum(residuals**2)
    degfrdm=M-N
    chisqred_red=chisqred/degfrdm
    p_chi2=1.0-scipy.stats.chi2.cdf(chisqred,degfrdm)
    stderr_reg=scipy.sqrt(chisqred_red)
    chisqre=(p_chi2,chisqred,chisqred_red,degfrdm,R2,R2_adj)
    
    '''shapiro-wilk test'''
    w,p_shapiro=scipy.stats.shapiro(residuals)
    mean_res=scipy.mean(residuals)
    stddev_res=scipy.sqrt(scipy.var(residuals))
    t_res=mean_res/stddev_res
    p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
    
    '''F-test'''
    F=MSM/MSE
    p_F=1-scipy.stats.f.cdf(F,DFM,DFE)
    
    '''durbin-watson'''
    dw=stools.durbin_watson(residuals)
    
    resanal=(p_shapiro,w,mean_res,p_res,F,p_F,dw)

    return popt,pcov,perr,p95,p_p,chisqre,resanal,R2,chisqred,w,dw
    def summary(self, yname=None, xname=None, title=None, alpha=.05):
        """Summarize the Regression Results

        Parameters
        -----------
        yname : string, optional
            Default is `y`
        xname : list of strings, optional
            Default is `var_##` for ## in p the number of regressors
        title : string, optional
            Title for the top table. If not None, then this replaces the
            default title
        alpha : float
            significance level for the confidence intervals

        Returns
        -------
        smry : Summary instance
            this holds the summary tables and text, which can be printed or
            converted to various output formats.

        See Also
        --------
        statsmodels.iolib.summary.Summary : class to hold summary
            results

        """

        #TODO: import where we need it (for now), add as cached attributes
        from statsmodels.stats.stattools import (jarque_bera,
                omni_normtest, durbin_watson)
        jb, jbpv, skew, kurtosis = jarque_bera(self.wresid)
        omni, omnipv = omni_normtest(self.wresid)

        eigvals = self.eigenvals
        condno = self.condition_number

        self.diagn = dict(jb=jb, jbpv=jbpv, skew=skew, kurtosis=kurtosis,
                          omni=omni, omnipv=omnipv, condno=condno,
                          mineigval=eigvals[0])

        top_left = [('Dep. Variable:', None),
                    ('Model:', None),
                    ('Method:', ['Least Squares']),
                    ('Date:', None),
                    ('Time:', None)
                    ]

        top_right = [('Pseudo R-squared:', ["%#8.4g" % self.prsquared]),
                     ('Bandwidth:', ["%#8.4g" % self.bandwidth]),
                     ('Sparsity:', ["%#8.4g" % self.sparsity]),
                     ('No. Observations:', None),
                     ('Df Residuals:', None), #[self.df_resid]), #TODO: spelling
                     ('Df Model:', None) #[self.df_model])
                    ]

        diagn_left = [('Omnibus:', ["%#6.3f" % omni]),
                      ('Prob(Omnibus):', ["%#6.3f" % omnipv]),
                      ('Skew:', ["%#6.3f" % skew]),
                      ('Kurtosis:', ["%#6.3f" % kurtosis])
                      ]

        diagn_right = [('Durbin-Watson:', ["%#8.3f" % durbin_watson(self.wresid)]),
                       ('Jarque-Bera (JB):', ["%#8.3f" % jb]),
                       ('Prob(JB):', ["%#8.3g" % jbpv]),
                       ('Cond. No.', ["%#8.3g" % condno])
                       ]


        if title is None:
            title = self.model.__class__.__name__ + ' ' + "Regression Results"

        #create summary table instance
        from statsmodels.iolib.summary import Summary
        smry = Summary()
        smry.add_table_2cols(self, gleft=top_left, gright=top_right,
                          yname=yname, xname=xname, title=title)
        smry.add_table_params(self, yname=yname, xname=xname, alpha=.05,
                             use_t=True)

#        smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right,
                          #yname=yname, xname=xname,
                          #title="")

        #add warnings/notes, added to text format only
        etext = []
        if eigvals[-1] < 1e-10:
            wstr = "The smallest eigenvalue is %6.3g. This might indicate "
            wstr += "that there are\n"
            wstr += "strong multicollinearity problems or that the design "
            wstr += "matrix is singular."
            wstr = wstr % eigvals[-1]
            etext.append(wstr)
        elif condno > 1000:  #TODO: what is recommended
            wstr = "The condition number is large, %6.3g. This might "
            wstr += "indicate that there are\n"
            wstr += "strong multicollinearity or other numerical "
            wstr += "problems."
            wstr = wstr % condno
            etext.append(wstr)

        if etext:
            smry.add_extra_txt(etext)

        return smry
Esempio n. 47
0
        'F_Statistic': results.fvalue,
        'F_Statistic_P_Value': results.f_pvalue,
        'Log_Likelihood': results.llf,
        'AIC': results.aic,
        'BIC': results.bic,
        'Number_Of_Observations': int(results.nobs),
        'Degrees_Of_Freedom_Model': int(results.df_model),
        'Degrees_Of_Freedom_Residual': int(results.df_resid)
    },
    'Parameters': params,
    'Diagnostics': {
        'Omnibus': results.diagn['omni'],
        'Omnibus_P_Value': results.diagn['omnipv'],
        'Skew': results.diagn['skew'],
        'Kurtosis': results.diagn['kurtosis'],
        'Durbin_Watson': durbin_watson(results.wresid),
        'Jarque_Bera': results.diagn['jb'],
        'Jarque_Bera_P_Value': results.diagn['jbpv'],
        'Condition_Number': results.diagn['condno']
    }
}

print(json.dumps(resultsObject, sort_keys=True))


#                             OLS Regression Results                            
# ==============================================================================
# Dep. Variable:                   var1   R-squared:                       0.734
# Model:                            OLS   Adj. R-squared:                  0.706
# Method:                 Least Squares   F-statistic:                     26.21
# Date:                Sun, 05 Jul 2015   Prob (F-statistic):           3.45e-06
Esempio n. 48
0
 def test_durbin_watson_2d(self, reset_randomstate):
     shape = (1, 10)
     x = np.random.standard_normal(100)
     dw = sum(np.diff(x)**2.0) / np.dot(x, x)
     x = np.tile(x[:, None], shape)
     assert_almost_equal(np.squeeze(dw * np.ones(shape)), durbin_watson(x))
Esempio n. 49
0
def fitdata(f,Xdata,Ydata,Errdata,pguess,ax= False,ax2= False):
    
    #calculating the popt 
    
    def error(pguess,Xdata,Ydata,Errdata):
        Y=f(Xdata,pguess)
        residuals= (Y-Ydata)/Errdata
        return (residuals)
    
    res= scipy.optimize.leastsq(error, pguess,args=(Xdata,Ydata,Errdata),full_output=1)
      
    (popt,pcov,infodict,errmsg,ier)=res
      
    perr= scipy.sqrt(scipy.diag(pcov))
    
    M= len(Xdata)
    N= len(popt)
   #residuals
    Y= f(Xdata,popt)
    residuals=(Y-Ydata)/Errdata
    meanY= scipy.mean(Ydata)
    squares= (Y-meanY)/Errdata
    squaresT= (Ydata-meanY)/Errdata
    
    SSM= sum(squares**2)#corrected sum of squares
    SSE= sum(residuals**2)#sum of squares of errors
    SST= sum(squaresT**2)#total corrected sum of squrare

    DFM= N-1
    DFE= M-N
    DFT= M-1

    MSM= SSM/DFM
    MSE= SSE/DFE
    MST= SST/DFT

    R2= SSM/SST    #proportion of explained variance
    R2_adj= 1-(1-R2)*(M-1)/(M-N-1)#Adjusted R2 

    # t test to see if parameters are different from 0
    t_stat= popt/perr
    t_stat= t_stat.real
    p_p= 1.0-scipy.stats.t.cdf(t_stat,DFE)
    z=scipy.stats.t(M-N).ppf(0.95)
    p95= perr*z

    #chisquared analysis on residuals
    chisquared= sum(residuals**2)
    degfreedom= M-N
    chisquared_red= chisquared/degfreedom
    p_chi2= 1.0-scipy.stats.chi2.cdf(chisquared,degfreedom)       
    stderr_reg= scipy.sqrt(chisquared_red)
    chisquare=(p_chi2, chisquared,chisquared_red,degfreedom,R2,R2_adj)
    
    #analysis of residuals
    w,p_shapiro= scipy.stats.shapiro(residuals)
    mean_res= scipy.mean(residuals)
    stddev_res= scipy.sqrt(scipy.var(residuals))
    t_res= mean_res/stddev_res 
    p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
    #if p<0.05, null hypothesis is rejected and mean is non-zero
    #should be high for a good fit
    
    #F-test on the residuals
    F= MSM/MSE #explained variance/ unexplained should be large
    p_F= 1.0-scipy.stats.f.cdf(F,DFM,DFE)
    #if p_F<0.05, null hypo is rejected
    dw= stools.durbin_watson(residuals)
    resanal= (p_shapiro,w,mean_res,p_res,F,p_F,dw)
    
    if ax:
        formataxis(ax)
        ax.plot(Ydata,Y,'ro')
        ax.errorbar(Ydata,Y,yerr=Errdata,fmt='.')
        Ymin, Ymax= min((min(Y),min(Ydata))),max((max(Y),max(Ydata)))
        ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
        
        ax.xaxis.label.set_text('Data')
        ax.yaxis.label.set_text('Fitted')
        
        sigmaY, avg_stddev_data= get_stderr_fit(f,Xdata,popt,pcov)
        Yplus= Y+sigmaY
        Yminus= Y-sigmaY
        ax.plot(Y,Yplus,'c',alpha=0.6,linestyle='--',linewidth= 0.5)
        ax.plot(Y,Yminus,'c',alpha=0.6,linestyle='--',linewidth= 0.5)
        ax.fill_between(Y,Yminus,Yplus,facecolor= 'cyan',alpha=0.5)
        titletext='Parity plot for fit.\n'
        titletext+= r'$r^r$=%5.2f,$r^2_(adj)$=%5.2f,'
        titletext+= '$\sigma_<exp>$=%5.2f,$\chi^2_<\nu>$= %5.2f,$p_<chi_2>$=%5.2f,'
        titletext+= '$sigma_<err>^<reg>$=%5.2f'
        
        ax.title.set_text(titletext%(R2,R2_adj,avg_stddev_data,chisquared_red,p_chi2,stderr_reg))
        ax.figure.canvas.draw()
        
    if ax2 :  #test for homoscedaticity  
        formataxis(ax2)
        ax2.plot(Y,residuals,'ro')
        
        ax2.xaxis.label.set_text('Fitted Data')
        ax2.yaxis.label.set_text('Residuals')
        
        titletext= 'Analysis of Residuals\n'
        titletext+= r'mean=%5.2f,$p_(res)$=%5.2f,$p_<shapiro>$= %5.2f, $Durbin-Watson$=%2.1f'
        titletext+= '\n F= %5.2f, $p_F$=%3.2e'
        ax2.title.set_text(titletext%(mean_res,p_res,p_shapiro,dw, F, p_F))
        
        ax2.figure.canvas.draw()
        
    return popt, pcov,perr,p95,p_p,chisquare,resanal
def fitdata(f,Xdata,Ydata,Errdata,pguess,ax=False,ax2=False):
	'''
	fitdata(f,Xdata,Ydata,Errdata,pguess):
	'''
	def error(p,Xdata,Ydata,Errdata):
		Y=f(Xdata,p)
		residuals=(Y-Ydata)/Errdata
		return residuals
	res=scipy.optimize.leastsq(error,pguess,args=(Xdata,Ydata,Errdata),full_output=1)
	(popt,pcov,infodict,errmsg,ier)=res   #optimize p
	perr=scipy.sqrt(scipy.diag(pcov))    #vector of sd of p
	M=len(Ydata)
	N=len(popt)
	#Residuals
	Y=f(Xdata,popt)
	residuals=(Y-Ydata)/Errdata
	meanY=scipy.mean(Ydata)
	squares=(Y-meanY)/Errdata
	squaresT=(Ydata-meanY)/Errdata
	
	SSM=sum(squares**2) #corrected sum of squares
	SSE=sum(residuals**2)  #sum of squares of errors
	SST=sum(squaresT**2)  #total corrected sum of squares
	
	DFM=N-1   #for model
	DFE=M-N   #for error
	DFT=M-1   #total
	
	MSM=SSM/DFM #mean squares for model(explained variance)
	MSE=SSE/DFE #mean squares for errors(should be small wrt MSM) unexplained variance
	MST=SST/DFT #mean squares for total	
	
	R2=SSM/SST  #proportion of explained variance
	R2_adj=1-(1-R2)*(M-1)/(M-N-1) #adjusted R2
	
	#ttest to see if parameters are different from zero
	t_stat=popt/perr #tstatistic for popt different from zero
	t_stat=t_stat.real
	p_p=1.0-scipy.stats.t.cdf(t_stat,DFE) #should be low for good fit
	z=scipy.stats.t(M-N).ppf(0.95)
	p95=perr*z
	#Chisquared ananlysis on residuals
	chisquared=sum(residuals**2)
	degfreedom=M-N
	chisquared_red=chisquared/degfreedom
	p_chi2=1.0-scipy.stats.chi2.cdf(chisquared,degfreedom)
	stderr_reg=scipy.sqrt(chisquared_red)
	chisquare=(p_chi2,chisquared,chisquared_red,degfreedom,R2,R2_adj)
	
	#Analysis of residuals
	w,p_shapiro=scipy.stats.shapiro(residuals) # to check if residuals are normally distributed
	mean_res=scipy.mean(residuals)
	stddev_res=scipy.sqrt(scipy.var(residuals))
	t_res=mean_res/stddev_res
	p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
	#if p_res<0.05,null hypothesis is rejected.
	#R^2>0 and at least one of the fitting parameters>0
	#F-test on residuals
	F=MSM/MSE
	p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
	
	dw=stools.durbin_watson(residuals) #to check if they are correlated
	resanal=(p_shapiro,w,mean_res,p_res,F,p_F,dw)
	
	if ax:
		formataxis(ax)
		ax.plot(Ydata,Y,'ro')
		ax.errorbar(Ydata,Y,yerr=Errdata,fmt='.')
		Ymin,Ymax=min((min(Y),min(Ydata))),max((max(Y),max(Ydata)))
		ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
		
		ax.xaxis.label.set_text('Data')
		ax.yaxis.label.set_text('Fitted')
		sigmay,avg_stddev_data=get_stderr_fit(f, Xdata, popt, pcov)
		Yplus=Y+sigmay
		Yminus=Y-sigmay
		ax.plot(Y,Yplus,'c',alpha=.6,linestyle='--',linewidth=.5)
		ax.plot(Y,Yminus,'c',alpha=.6,linestyle='--',linewidth=.5)
		ax.fill_between(Y,Yminus,Yplus,facecolor='cyan',alpha=.5)
		titletext='Parity plot for fit.\n'
		titletext+=r'$r^2$=%5.2f,$r^2_{adj}$=%5.2f, '
		titletext+='$\sigma_{exp}$=%5.2f,$\chi^2_{\nu}$=%5.2f,$p_{\chi^2}$=%5.2f, '
		titletext+='$\sigma_{err}^{reg}$=%5.2f'
		ax.title.set_text(titletext%(R2,R2_adj,avg_stddev_data,chisquared_red,p_chi2,stderr_reg))
		ax.figure.canvas.draw()
	
	if ax2:#test for homoscedasticity
		formataxis(ax2)
		ax2.plot(Y,residuals,'ro')
		
		ax2.xaxis.label.set_text('Fitted data')
		ax2.yaxis.label.set_text('Residuals')
		
		titletext='Analysis of residuals\n'
		titletext+=r'mean=%5.2f,$p_{res}$=%5.2f,$p_{shapiro}$=%5.2f,$Durbin-Watson$=%2.1f'
		titletext+='\n F=%5.2f,$p_F$=%3.2e'
		ax2.title.set_text(titletext%(mean_res,p_res,p_shapiro,dw,F,p_F))
		
		ax2.figure.canvas.draw()
		
	return popt,pcov,perr,p95,p_p,chisquare,resanal
Esempio n. 51
0
def test_durbin_watson_pandas():
    x = np.random.randn(50)
    x_series = pd.Series(x)
    assert_almost_equal(durbin_watson(x), durbin_watson(x_series), decimal=13)
Esempio n. 52
0
 def test_durbin_watson(self):
     x = np.random.standard_normal(100)
     dw = sum(np.diff(x)**2.0) / np.dot(x, x)
     assert_almost_equal(dw, durbin_watson(x))