Python load_pandasの例、statsmodels.datasets.longley.load_pandas Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_formula.py プロジェクト: Tskatom/Embers_VT

def test_formula_predict():
    from numpy import log
    formula = """TOTEMP ~ log(GNPDEFL) + log(GNP) + UNEMP + ARMED +
                    POP + YEAR"""
    data = load_pandas()
    dta = load_pandas().data
    results = ols(formula, dta).fit()
    npt.assert_equal(results.fittedvalues, results.predict(data.exog))

コード例 #2

0

ファイルを表示

ファイル: test_formula.py プロジェクト: zhisheng/statsmodels

def test_formula_predict():
    from numpy import log
    formula = """TOTEMP ~ log(GNPDEFL) + log(GNP) + UNEMP + ARMED +
                    POP + YEAR"""
    data = load_pandas()
    dta = load_pandas().data
    results = ols(formula, dta).fit()
    npt.assert_almost_equal(results.fittedvalues.values,
                            results.predict(data.exog), 8)

コード例 #3

0

ファイルを表示

ファイル: test_formula.py プロジェクト: bashtage/statsmodels

def test_formula_predict():
    # `log` is needed in the namespace for patsy to find
    from numpy import log  # noqa:F401
    formula = """TOTEMP ~ log(GNPDEFL) + log(GNP) + UNEMP + ARMED +
                    POP + YEAR"""
    data = load_pandas()
    dta = load_pandas().data
    results = ols(formula, dta).fit()
    npt.assert_almost_equal(results.fittedvalues.values,
                            results.predict(data.exog), 8)

コード例 #4

0

ファイルを表示

ファイル: test_formula.py プロジェクト: RDKCH/statsmodels

def test_formula_predict():
    # `log` is needed in the namespace for patsy to find
    from numpy import log  # noqa:F401
    formula = """TOTEMP ~ log(GNPDEFL) + log(GNP) + UNEMP + ARMED +
                    POP + YEAR"""
    data = load_pandas()
    dta = load_pandas().data
    results = ols(formula, dta).fit()
    npt.assert_almost_equal(results.fittedvalues.values,
                            results.predict(data.exog), 8)

コード例 #5

0

ファイルを表示

ファイル: test_tools.py プロジェクト: nsolcampbell/statsmodels

def test_pandas_const_df_prepend():
    dta = longley.load_pandas().exog
    # regression test for #1025
    dta["UNEMP"] /= dta["UNEMP"].std()
    dta = tools.add_constant(dta, prepend=True)
    assert_string_equal("const", dta.columns[0])
    assert_equal(dta.var(0)[0], 0)

コード例 #6

0

ファイルを表示

ファイル: test_tools.py プロジェクト: yarikoptic/pystatsmodels

def test_pandas_const_df_prepend():
    dta = longley.load_pandas().exog
    # regression test for #1025
    dta['UNEMP'] /= dta['UNEMP'].std()
    dta = tools.add_constant(dta, prepend=True)
    assert_string_equal('const', dta.columns[0])
    assert_equal(dta.var(0)[0], 0)

コード例 #7

0

ファイルを表示

ファイル: test_regression.py プロジェクト: statsmodels/statsmodels

def test_summary_as_latex():
    # GH#734
    import re
    dta = longley.load_pandas()
    X = dta.exog
    X["constant"] = 1
    y = dta.endog
    res = OLS(y, X).fit()
    with pytest.warns(UserWarning):
        table = res.summary().as_latex()
    # replace the date and time
    table = re.sub("(?<=\n\\\\textbf\\{Date:\\}             &).+?&",
                   " Sun, 07 Apr 2013 &", table)
    table = re.sub("(?<=\n\\\\textbf\\{Time:\\}             &).+?&",
                   "     13:46:07     &", table)

    expected = """\\begin{center}
\\begin{tabular}{lclc}
\\toprule
\\textbf{Dep. Variable:}    &      TOTEMP      & \\textbf{  R-squared:         } &     0.995   \\\\
\\textbf{Model:}            &       OLS        & \\textbf{  Adj. R-squared:    } &     0.992   \\\\
\\textbf{Method:}           &  Least Squares   & \\textbf{  F-statistic:       } &     330.3   \\\\
\\textbf{Date:}             & Sun, 07 Apr 2013 & \\textbf{  Prob (F-statistic):} &  4.98e-10   \\\\
\\textbf{Time:}             &     13:46:07     & \\textbf{  Log-Likelihood:    } &   -109.62   \\\\
\\textbf{No. Observations:} &          16      & \\textbf{  AIC:               } &     233.2   \\\\
\\textbf{Df Residuals:}     &           9      & \\textbf{  BIC:               } &     238.6   \\\\
\\textbf{Df Model:}         &           6      & \\textbf{                     } &             \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lcccccc}
                  & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]}  \\\\
\\midrule
\\textbf{GNPDEFL}  &      15.0619  &       84.915     &     0.177  &         0.863        &     -177.029    &      207.153     \\\\
\\textbf{GNP}      &      -0.0358  &        0.033     &    -1.070  &         0.313        &       -0.112    &        0.040     \\\\
\\textbf{UNEMP}    &      -2.0202  &        0.488     &    -4.136  &         0.003        &       -3.125    &       -0.915     \\\\
\\textbf{ARMED}    &      -1.0332  &        0.214     &    -4.822  &         0.001        &       -1.518    &       -0.549     \\\\
\\textbf{POP}      &      -0.0511  &        0.226     &    -0.226  &         0.826        &       -0.563    &        0.460     \\\\
\\textbf{YEAR}     &    1829.1515  &      455.478     &     4.016  &         0.003        &      798.788    &     2859.515     \\\\
\\textbf{constant} &   -3.482e+06  &      8.9e+05     &    -3.911  &         0.004        &     -5.5e+06    &    -1.47e+06     \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lclc}
\\textbf{Omnibus:}       &  0.749 & \\textbf{  Durbin-Watson:     } &    2.559  \\\\
\\textbf{Prob(Omnibus):} &  0.688 & \\textbf{  Jarque-Bera (JB):  } &    0.684  \\\\
\\textbf{Skew:}          &  0.420 & \\textbf{  Prob(JB):          } &    0.710  \\\\
\\textbf{Kurtosis:}      &  2.434 & \\textbf{  Cond. No.          } & 4.86e+09  \\\\
\\bottomrule
\\end{tabular}
%\\caption{OLS Regression Results}
\\end{center}

Warnings: \\newline
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. \\newline
 [2] The condition number is large, 4.86e+09. This might indicate that there are \\newline
 strong multicollinearity or other numerical problems."""
    assert_equal(table, expected)

コード例 #8

0

ファイルを表示

def test_summary_as_latex():
    # GH#734
    import re
    dta = longley.load_pandas()
    X = dta.exog
    X["constant"] = 1
    y = dta.endog
    res = OLS(y, X).fit()
    with pytest.warns(UserWarning):
        table = res.summary().as_latex()
    # replace the date and time
    table = re.sub("(?<=\n\\\\textbf\\{Date:\\}             &).+?&",
                   " Sun, 07 Apr 2013 &", table)
    table = re.sub("(?<=\n\\\\textbf\\{Time:\\}             &).+?&",
                   "     13:46:07     &", table)

    expected = """\\begin{center}
\\begin{tabular}{lclc}
\\toprule
\\textbf{Dep. Variable:}    &      TOTEMP      & \\textbf{  R-squared:         } &     0.995   \\\\
\\textbf{Model:}            &       OLS        & \\textbf{  Adj. R-squared:    } &     0.992   \\\\
\\textbf{Method:}           &  Least Squares   & \\textbf{  F-statistic:       } &     330.3   \\\\
\\textbf{Date:}             & Sun, 07 Apr 2013 & \\textbf{  Prob (F-statistic):} &  4.98e-10   \\\\
\\textbf{Time:}             &     13:46:07     & \\textbf{  Log-Likelihood:    } &   -109.62   \\\\
\\textbf{No. Observations:} &          16      & \\textbf{  AIC:               } &     233.2   \\\\
\\textbf{Df Residuals:}     &           9      & \\textbf{  BIC:               } &     238.6   \\\\
\\textbf{Df Model:}         &           6      & \\textbf{                     } &             \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lcccccc}
                  & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]}  \\\\
\\midrule
\\textbf{GNPDEFL}  &      15.0619  &       84.915     &     0.177  &         0.863        &     -177.029    &      207.153     \\\\
\\textbf{GNP}      &      -0.0358  &        0.033     &    -1.070  &         0.313        &       -0.112    &        0.040     \\\\
\\textbf{UNEMP}    &      -2.0202  &        0.488     &    -4.136  &         0.003        &       -3.125    &       -0.915     \\\\
\\textbf{ARMED}    &      -1.0332  &        0.214     &    -4.822  &         0.001        &       -1.518    &       -0.549     \\\\
\\textbf{POP}      &      -0.0511  &        0.226     &    -0.226  &         0.826        &       -0.563    &        0.460     \\\\
\\textbf{YEAR}     &    1829.1515  &      455.478     &     4.016  &         0.003        &      798.788    &     2859.515     \\\\
\\textbf{constant} &   -3.482e+06  &      8.9e+05     &    -3.911  &         0.004        &     -5.5e+06    &    -1.47e+06     \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lclc}
\\textbf{Omnibus:}       &  0.749 & \\textbf{  Durbin-Watson:     } &    2.559  \\\\
\\textbf{Prob(Omnibus):} &  0.688 & \\textbf{  Jarque-Bera (JB):  } &    0.684  \\\\
\\textbf{Skew:}          &  0.420 & \\textbf{  Prob(JB):          } &    0.710  \\\\
\\textbf{Kurtosis:}      &  2.434 & \\textbf{  Cond. No.          } & 4.86e+09  \\\\
\\bottomrule
\\end{tabular}
%\\caption{OLS Regression Results}
\\end{center}

Warnings: \\newline
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. \\newline
 [2] The condition number is large, 4.86e+09. This might indicate that there are \\newline
 strong multicollinearity or other numerical problems."""
    assert_equal(table, expected)

コード例 #9

0

ファイルを表示

ファイル: test_formula.py プロジェクト: RDKCH/statsmodels

def test_tests():
    formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
    dta = load_pandas().data
    results = ols(formula, dta).fit()
    test_formula = '(GNPDEFL = GNP), (UNEMP = 2), (YEAR/1829 = 1)'
    LC = make_hypotheses_matrices(results, test_formula)
    R = LC.coefs
    Q = LC.constants
    npt.assert_almost_equal(R, [[0, 1, -1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 1. / 1829]], 8)
    npt.assert_array_equal(Q, [[0], [2], [1]])

コード例 #10

0

ファイルを表示

ファイル: test_formula.py プロジェクト: Tskatom/Embers_VT

def test_tests():
    formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
    dta = load_pandas().data
    results = ols(formula, dta).fit()
    test_formula = '(GNPDEFL = GNP), (UNEMP = 2), (YEAR/1829 = 1)'
    LC = make_hypotheses_matrices(results, test_formula)
    R = LC.coefs
    Q = LC.constants
    npt.assert_almost_equal(R, [[0, 1, -1, 0, 0, 0, 0],
                               [0, 0 , 0, 1, 0, 0, 0],
                               [0, 0, 0, 0, 0, 0, 1./1829]], 8)
    npt.assert_array_equal(Q, [[0],[2],[1]])

コード例 #11

0

ファイルを表示

def test_summary():
    # test 734
    import re
    dta = longley.load_pandas()
    X = dta.exog
    X["constant"] = 1
    y = dta.endog
    with warnings.catch_warnings(record=True):
        res = OLS(y, X).fit()
        table = res.summary().as_latex()
    # replace the date and time
    table = re.sub("(?<=\n\\\\textbf\{Date:\}             &).+?&",
                   " Sun, 07 Apr 2013 &", table)
    table = re.sub("(?<=\n\\\\textbf\{Time:\}             &).+?&",
                   "     13:46:07     &", table)

    expected = """\\begin{center}
\\begin{tabular}{lclc}
\\toprule
\\textbf{Dep. Variable:}    &      TOTEMP      & \\textbf{  R-squared:         } &     0.995   \\\\
\\textbf{Model:}            &       OLS        & \\textbf{  Adj. R-squared:    } &     0.992   \\\\
\\textbf{Method:}           &  Least Squares   & \\textbf{  F-statistic:       } &     330.3   \\\\
\\textbf{Date:}             & Sun, 07 Apr 2013 & \\textbf{  Prob (F-statistic):} &  4.98e-10   \\\\
\\textbf{Time:}             &     13:46:07     & \\textbf{  Log-Likelihood:    } &   -109.62   \\\\
\\textbf{No. Observations:} &          16      & \\textbf{  AIC:               } &     233.2   \\\\
\\textbf{Df Residuals:}     &           9      & \\textbf{  BIC:               } &     238.6   \\\\
\\textbf{Df Model:}         &           6      & \\textbf{                     } &             \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lccccc}
                  & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$>$$|$t$|$} & \\textbf{[95.0\\% Conf. Int.]}  \\\\
\\midrule
\\textbf{GNPDEFL}  &      15.0619  &       84.915     &     0.177  &         0.863        &      -177.029   207.153       \\\\
\\textbf{GNP}      &      -0.0358  &        0.033     &    -1.070  &         0.313        &        -0.112     0.040       \\\\
\\textbf{UNEMP}    &      -2.0202  &        0.488     &    -4.136  &         0.003        &        -3.125    -0.915       \\\\
\\textbf{ARMED}    &      -1.0332  &        0.214     &    -4.822  &         0.001        &        -1.518    -0.549       \\\\
\\textbf{POP}      &      -0.0511  &        0.226     &    -0.226  &         0.826        &        -0.563     0.460       \\\\
\\textbf{YEAR}     &    1829.1515  &      455.478     &     4.016  &         0.003        &       798.788  2859.515       \\\\
\\textbf{constant} &   -3.482e+06  &      8.9e+05     &    -3.911  &         0.004        &      -5.5e+06 -1.47e+06       \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lclc}
\\textbf{Omnibus:}       &  0.749 & \\textbf{  Durbin-Watson:     } &    2.559  \\\\
\\textbf{Prob(Omnibus):} &  0.688 & \\textbf{  Jarque-Bera (JB):  } &    0.684  \\\\
\\textbf{Skew:}          &  0.420 & \\textbf{  Prob(JB):          } &    0.710  \\\\
\\textbf{Kurtosis:}      &  2.434 & \\textbf{  Cond. No.          } & 4.86e+09  \\\\
\\bottomrule
\\end{tabular}
%\\caption{OLS Regression Results}
\\end{center}"""
    assert_equal(table, expected)

コード例 #12

0

ファイルを表示

ファイル: test_regression.py プロジェクト: Honglang/statsmodels

def test_summary():
    # test 734
    import re
    dta = longley.load_pandas()
    X = dta.exog
    X["constant"] = 1
    y = dta.endog
    with warnings.catch_warnings(record=True):
        res = OLS(y, X).fit()
        table = res.summary().as_latex()
    # replace the date and time
    table = re.sub("(?<=\n\\\\textbf\{Date:\}             &).+?&",
                   " Sun, 07 Apr 2013 &", table)
    table = re.sub("(?<=\n\\\\textbf\{Time:\}             &).+?&",
                   "     13:46:07     &", table)

    expected = """\\begin{center}
\\begin{tabular}{lclc}
\\toprule
\\textbf{Dep. Variable:}    &      TOTEMP      & \\textbf{  R-squared:         } &     0.995   \\\\
\\textbf{Model:}            &       OLS        & \\textbf{  Adj. R-squared:    } &     0.992   \\\\
\\textbf{Method:}           &  Least Squares   & \\textbf{  F-statistic:       } &     330.3   \\\\
\\textbf{Date:}             & Sun, 07 Apr 2013 & \\textbf{  Prob (F-statistic):} &  4.98e-10   \\\\
\\textbf{Time:}             &     13:46:07     & \\textbf{  Log-Likelihood:    } &   -109.62   \\\\
\\textbf{No. Observations:} &          16      & \\textbf{  AIC:               } &     233.2   \\\\
\\textbf{Df Residuals:}     &           9      & \\textbf{  BIC:               } &     238.6   \\\\
\\textbf{Df Model:}         &           6      & \\textbf{                     } &             \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lccccc}
                  & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$>$$|$t$|$} & \\textbf{[95.0\\% Conf. Int.]}  \\\\
\\midrule
\\textbf{GNPDEFL}  &      15.0619  &       84.915     &     0.177  &         0.863        &      -177.029   207.153       \\\\
\\textbf{GNP}      &      -0.0358  &        0.033     &    -1.070  &         0.313        &        -0.112     0.040       \\\\
\\textbf{UNEMP}    &      -2.0202  &        0.488     &    -4.136  &         0.003        &        -3.125    -0.915       \\\\
\\textbf{ARMED}    &      -1.0332  &        0.214     &    -4.822  &         0.001        &        -1.518    -0.549       \\\\
\\textbf{POP}      &      -0.0511  &        0.226     &    -0.226  &         0.826        &        -0.563     0.460       \\\\
\\textbf{YEAR}     &    1829.1515  &      455.478     &     4.016  &         0.003        &       798.788  2859.515       \\\\
\\textbf{constant} &   -3.482e+06  &      8.9e+05     &    -3.911  &         0.004        &      -5.5e+06 -1.47e+06       \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lclc}
\\textbf{Omnibus:}       &  0.749 & \\textbf{  Durbin-Watson:     } &    2.559  \\\\
\\textbf{Prob(Omnibus):} &  0.688 & \\textbf{  Jarque-Bera (JB):  } &    0.684  \\\\
\\textbf{Skew:}          &  0.420 & \\textbf{  Prob(JB):          } &    0.710  \\\\
\\textbf{Kurtosis:}      &  2.434 & \\textbf{  Cond. No.          } & 4.86e+09  \\\\
\\bottomrule
\\end{tabular}
%\\caption{OLS Regression Results}
\\end{center}"""
    assert_equal(table, expected)

コード例 #13

0

ファイルを表示

ファイル: test_tools.py プロジェクト: nsolcampbell/statsmodels

def test_pandas_const_series_prepend():
    dta = longley.load_pandas()
    series = dta.exog["GNP"]
    series = tools.add_constant(series, prepend=True)
    assert_string_equal("const", series.columns[0])
    assert_equal(series.var(0)[0], 0)

コード例 #14

0

ファイルを表示

ファイル: test_formula.py プロジェクト: RDKCH/statsmodels

 def setup_class(cls):
     data = dict((k, v.tolist()) for k, v in iteritems(load_pandas().data))
     cls.model = ols(longley_formula, data)
     super(TestFormulaDict, cls).setup_class()

コード例 #15

0

ファイルを表示

ファイル: ols.py プロジェクト: BranYang/statsmodels

res3 = sm.OLS(y, X).fit()


print(res3.f_test(R))


print(res3.f_test("x2 = x3 = 0"))


# ### Multicollinearity
# 
# The Longley dataset is well known to have high multicollinearity. That is, the exogenous predictors are highly correlated. This is problematic because it can affect the stability of our coefficient estimates as we make minor changes to model specification. 

from statsmodels.datasets.longley import load_pandas
y = load_pandas().endog
X = load_pandas().exog
X = sm.add_constant(X)


# Fit and summary:

ols_model = sm.OLS(y, X)
ols_results = ols_model.fit()
print(ols_results.summary())


# #### Condition number
# 
# One way to assess multicollinearity is to compute the condition number. Values over 20 are worrisome (see Greene 4.9). The first step is to normalize the independent variables to have unit length:

コード例 #16

0

ファイルを表示

ファイル: test_tools.py プロジェクト: yarikoptic/pystatsmodels

def test_pandas_const_df():
    dta = longley.load_pandas().exog
    dta = tools.add_constant(dta, prepend=False)
    assert_string_equal('const', dta.columns[-1])
    assert_equal(dta.var(0)[-1], 0)

コード例 #17

0

ファイルを表示

ファイル: test_tools.py プロジェクト: dengemann/statsmodels

def test_pandas_const_df_prepend():
    dta = longley.load_pandas().exog
    dta = tools.add_constant(dta, prepend=True)
    assert_string_equal('const', dta.columns[0])
    assert_equal(dta.var(0)[0], 0)

コード例 #18

0

ファイルを表示

ファイル: test_formula.py プロジェクト: RDKCH/statsmodels

 def setup_class(cls):
     data = load_pandas().data
     cls.model = ols(longley_formula, data)
     super(TestFormulaPandas, cls).setup_class()

コード例 #19

0

ファイルを表示

ファイル: test_tools.py プロジェクト: yarikoptic/pystatsmodels

def test_pandas_const_series_prepend():
    dta = longley.load_pandas()
    series = dta.exog['GNP']
    series = tools.add_constant(series, prepend=True)
    assert_string_equal('const', series.columns[0])
    assert_equal(series.var(0)[0], 0)

コード例 #20

0

ファイルを表示

ファイル: test_tools.py プロジェクト: bfcondon/statsmodels

def test_pandas_const_series():
    dta = longley.load_pandas()
    series = dta.exog['GNP']
    series = tools.add_constant(series, prepend=False)
    assert_string_equal('const', series.columns[1])
    assert_equal(series.var(0)[1], 0)

コード例 #21

0

ファイルを表示

ファイル: test_formula.py プロジェクト: Tskatom/Embers_VT

 def setupClass(cls):
     data = load_pandas().data
     cls.model = ols(longley_formula, data)
     super(TestFormulaPandas, cls).setupClass()

コード例 #22

0

ファイルを表示

plt.show()


# In[155]:


file_tcs['pct_change'] = file_tcs['Close Price'].pct_change()
file_nifty['pct_change'] = file_nifty['Close'].pct_change()


# In[158]:


y = file_nifty['pct_change'].dropna()
x = file_tcs['pct_change'].dropna()
y = load_pandas().endog
x = load_pandas().exog
x = sm.add_constant(x)
myModel = sm.OLS(y, x).fit()
myModel.summary()


# In[160]:


tcs = pd.read_csv('TCS.csv', parse_dates=True, index_col='Date',)
nifty50 = pd.read_csv('Nifty50.csv', parse_dates=True, index_col='Date')


# In[162]:

コード例 #23

0

ファイルを表示

ファイル: ols_example.py プロジェクト: DoosanJung/tf_prob_study

    ols_dummy_vars_small = OLSExample()
    ols_dummy_vars_small.set_config(x_start=0,
                                    x_stop=20,
                                    n_samples=50,
                                    beta=[1.0, 0.3, -0.0, 10],
                                    dummy_slices=[20, 40])
    (y, X) = ols_dummy_vars_small.make_dummy_vars()
    res_ols_dummy_small = ols_dummy_vars_small.fit_data(y, X)
    print(res_ols_dummy_small.f_test(R))
    print(res_ols_dummy_small.f_test("x2 = x3 = 0"))
    """
    Multicollinearity: the exogenous predictors are highly correlated.
    This is problematic because it can affect the stability of our coefficient estimates 
    as we make minor changes to model specification.
    """
    y_multicol = load_pandas().endog
    X_multicol = load_pandas().exog
    X_multicol = sm.add_constant(X_multicol)
    print("X: ", X_multicol)

    res_ols_multicollinearity = ols.fit_data(y_multicol, X_multicol)
    print("Parameters     :", res_ols_multicollinearity.params)
    print("Standard errors: ", res_ols_multicollinearity.bse)
    print("R^2            :", res_ols_multicollinearity.rsquared)
    """
    condition number: to assess multicollinearity
    - Values over 20 are worrisome (see Greene 4.9)
    """
    #The first step is to normalize the independent variables to have unit length
    norm_x = X_multicol.values
    for i, name in enumerate(X_multicol):

コード例 #24

0

ファイルを表示

ファイル: ols.py プロジェクト: gongyg1/stat-model1

beta = [1., 0.3, -0.0, 10]
y_true = np.dot(X, beta)
y = y_true + np.random.normal(size=nsample)

res3 = sm.OLS(y, X).fit()

print(res3.f_test(R))

print(res3.f_test("x2 = x3 = 0"))

# ### Multicollinearity
#
# The Longley dataset is well known to have high multicollinearity. That is, the exogenous predictors are highly correlated. This is problematic because it can affect the stability of our coefficient estimates as we make minor changes to model specification.

from statsmodels.datasets.longley import load_pandas
y = load_pandas().endog
X = load_pandas().exog
X = sm.add_constant(X)

# Fit and summary:

ols_model = sm.OLS(y, X)
ols_results = ols_model.fit()
print(ols_results.summary())

# #### Condition number
#
# One way to assess multicollinearity is to compute the condition number. Values over 20 are worrisome (see Greene 4.9). The first step is to normalize the independent variables to have unit length:

for i, name in enumerate(X):
    if name == "const":

コード例 #25

0

ファイルを表示

ファイル: test_formula.py プロジェクト: Tskatom/Embers_VT

 def setupClass(cls):
     data = dict((k, v.tolist()) for k, v in load_pandas().data.iteritems())
     cls.model = ols(longley_formula, data)
     super(TestFormulaDict, cls).setupClass()

コード例 #26

0

ファイルを表示

ファイル: test_tools.py プロジェクト: nsolcampbell/statsmodels

def test_pandas_const_df():
    dta = longley.load_pandas().exog
    dta = tools.add_constant(dta, prepend=False)
    assert_string_equal("const", dta.columns[-1])
    assert_equal(dta.var(0)[-1], 0)

コード例 #27

0

ファイルを表示

ファイル: ftest.py プロジェクト: Fernal73/LearnPython3

A = A[1:, :]
print(A)
#This tests that each coefficient is jointly statistically significantly different from zero.
print(results.f_test(A))

print(results.fvalue)
# 330.2853392346658
print(results.f_pvalue)
#4.98403096572e-10
B = np.array(([0, 0, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 1, -1]))
#This tests that the coefficient on the 2nd and 3rd regressors are equal and jointly that the coefficient on the 5th and 6th regressors are equal.
print(results.f_test(B))

from statsmodels.datasets import longley
from statsmodels.formula.api import ols
dta = longley.load_pandas().data
print(dta.describe())
formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
results = ols(formula, dta).fit()
hypotheses = '(GNPDEFL = GNP), (UNEMP = 2), (YEAR/1829 = 1)'
print(hypotheses)
f_test = results.f_test(hypotheses)
print(f_test)
hypotheses = '(GNPDEFL = GNP), (UNEMP = 2)'
print(hypotheses)
f_test = results.f_test(hypotheses)
print(f_test)
hypotheses = '(GNPDEFL = GNP),(YEAR/1829 = 1)'
print(hypotheses)
f_test = results.f_test(hypotheses)
print(f_test)

コード例 #28

0

ファイルを表示

ファイル: test_tools.py プロジェクト: timgates42/statsmodels

def test_pandas_const_series():
    dta = longley.load_pandas()
    series = dta.exog["GNP"]
    series = tools.add_constant(series, prepend=False)
    assert_string_equal("const", series.columns[1])
    assert_equal(series.var(0)[1], 0)