import wooldridge as woo import numpy as np import pandas as pd import matplotlib.pyplot as plt affairs = woo.dataWoo('affairs') # attach labels (see previous script): affairs['ratemarr'] = affairs['ratemarr'] - 1 affairs['haskids'] = pd.Categorical.from_codes(affairs['kids'], categories=['no', 'yes']) mlab = ['very unhappy', 'unhappy', 'average', 'happy', 'very happy'] affairs['marriage'] = pd.Categorical.from_codes(affairs['ratemarr'], categories=mlab) # counts for all graphs: counts = affairs['marriage'].value_counts() counts_bykids = affairs['marriage'].groupby(affairs['haskids']).value_counts() counts_yes = counts_bykids['yes'] counts_no = counts_bykids['no'] # pie chart (a): grey_colors = ['0.3', '0.4', '0.5', '0.6', '0.7'] plt.pie(counts, labels=mlab, colors=grey_colors) plt.savefig('PyGraphs/Descr-Pie.pdf') plt.close() # horizontal bar chart (b): y_pos = [0, 1, 2, 3, 4] # the y locations for the bars plt.barh(y_pos, counts, color='0.6') plt.yticks(y_pos, mlab, rotation=60) # add and adjust labeling
import wooldridge as woo import numpy as np import statsmodels.formula.api as smf wage1 = woo.dataWoo('wage1') reg = smf.ols(formula='np.log(wage) ~ educ + exper + tenure', data=wage1) results = reg.fit() print(f'results.summary(): \n{results.summary()}\n')
import wooldridge as woo import pandas as pd import linearmodels.iv as iv jtrain = woo.dataWoo('jtrain') # define panel data (for 1987 and 1988 only): jtrain_87_88 = jtrain.loc[(jtrain['year'] == 1987) | (jtrain['year'] == 1988), :] jtrain_87_88 = jtrain_87_88.set_index(['fcode', 'year']) # manual computation of deviations of entity means: jtrain_87_88['lscrap_diff1'] = \ jtrain_87_88.sort_values(['fcode', 'year']).groupby('fcode')['lscrap'].diff() jtrain_87_88['hrsemp_diff1'] = \ jtrain_87_88.sort_values(['fcode', 'year']).groupby('fcode')['hrsemp'].diff() jtrain_87_88['grant_diff1'] = \ jtrain_87_88.sort_values(['fcode', 'year']).groupby('fcode')['grant'].diff() # IV regression: reg_iv = iv.IV2SLS.from_formula( formula='lscrap_diff1 ~ 1 + [hrsemp_diff1 ~ grant_diff1]', data=jtrain_87_88) results_iv = reg_iv.fit(cov_type='unadjusted', debiased=True) # print regression table: table_iv = pd.DataFrame({ 'b': round(results_iv.params, 4), 'se': round(results_iv.std_errors, 4), 't': round(results_iv.tstats, 4), 'pval': round(results_iv.pvalues, 4)
import wooldridge import pandas as pd import numpy as np import statsmodels.api as sm # c5 df = wooldridge.dataWoo('RDCHEM') lad = sm.formula.quantreg('rdintens ~ sales + np.square(sales) + profmarg', data=df).fit(q=0.5) student_resid = lad.outlier_test() outliers = student_resid[student_resid['student_resid'] > 1.96].index ols1 = sm.formula.ols('rdintens ~ sales + np.square(sales) + profmarg', data=df).fit() ols2 = sm.formula.ols('rdintens ~ sales + profmarg', data=df).fit() sm.stats.diagnostic.compare_cox(ols1, ols2)
import wooldridge as woo import numpy as np import pandas as pd import statsmodels.formula.api as smf hseinv = woo.dataWoo('hseinv') # linear regression without time trend: reg_wot = smf.ols(formula='np.log(invpc) ~ np.log(price)', data=hseinv) results_wot = reg_wot.fit() # print regression table: table_wot = pd.DataFrame({ 'b': round(results_wot.params, 4), 'se': round(results_wot.bse, 4), 't': round(results_wot.tvalues, 4), 'pval': round(results_wot.pvalues, 4) }) print(f'table_wot: \n{table_wot}\n') # linear regression with time trend (data set includes a time variable t): reg_wt = smf.ols(formula='np.log(invpc) ~ np.log(price) + t', data=hseinv) results_wt = reg_wt.fit() # print regression table: table_wt = pd.DataFrame({ 'b': round(results_wt.params, 4), 'se': round(results_wt.bse, 4), 't': round(results_wt.tvalues, 4), 'pval': round(results_wt.pvalues, 4) })
import wooldridge as woo import pandas as pd import numpy as np import statsmodels.formula.api as smf # define a function for the standardization: def scale(x): x_mean = np.mean(x) x_var = np.var(x, ddof=1) x_scaled = (x - x_mean) / np.sqrt(x_var) return x_scaled # standardize and estimate: hprice2 = woo.dataWoo('hprice2') hprice2['price_sc'] = scale(hprice2['price']) hprice2['nox_sc'] = scale(hprice2['nox']) hprice2['crime_sc'] = scale(hprice2['crime']) hprice2['rooms_sc'] = scale(hprice2['rooms']) hprice2['dist_sc'] = scale(hprice2['dist']) hprice2['stratio_sc'] = scale(hprice2['stratio']) reg = smf.ols( formula= 'price_sc ~ 0 + nox_sc + crime_sc + rooms_sc + dist_sc + stratio_sc', data=hprice2) results = reg.fit() # print regression table: table = pd.DataFrame({
import wooldridge as woo import statsmodels.api as sm import matplotlib.pyplot as plt ceosal1 = woo.dataWoo('ceosal1') # extract roe: roe = ceosal1['roe'] # estimate kernel density: kde = sm.nonparametric.KDEUnivariate(roe) kde.fit() # subfigure a (kernel density): plt.plot(kde.support, kde.density, color='black', linewidth=2) plt.ylabel('density') plt.xlabel('roe') plt.savefig('PyGraphs/Density1.pdf') plt.close() # subfigure b (kernel density with overlayed histogram): plt.hist(roe, color='grey', density=True) plt.plot(kde.support, kde.density, color='black', linewidth=2) plt.ylabel('density') plt.xlabel('roe') plt.savefig('PyGraphs/Density2.pdf')
import wooldridge as woo import numpy as np import statsmodels.formula.api as smf k401k = woo.dataWoo('401k') reg = smf.ols(formula='prate ~ mrate + age', data=k401k) results = reg.fit() print(f'results.summary(): \n{results.summary()}\n')
import wooldridge as woo import numpy as np import statsmodels.api as sm import statsmodels.formula.api as smf import matplotlib.pyplot as plt rdchem = woo.dataWoo('rdchem') # OLS regression: reg = smf.ols(formula='rdintens ~ sales + profmarg', data=rdchem) results = reg.fit() # studentized residuals for all observations: studres = results.get_influence().resid_studentized_external # display extreme values: studres_max = np.max(studres) studres_min = np.min(studres) print(f'studres_max: {studres_max}\n') print(f'studres_min: {studres_min}\n') # histogram (and overlayed density plot): kde = sm.nonparametric.KDEUnivariate(studres) kde.fit() plt.hist(studres, color='grey', density=True) plt.plot(kde.support, kde.density, color='black', linewidth=2) plt.ylabel('density') plt.xlabel('studres') plt.savefig('PyGraphs/Outliers.pdf')
import wooldridge as woo import numpy as np import linearmodels as plm crime4 = woo.dataWoo('crime4') crime4 = crime4.set_index(['county', 'year'], drop=False) # estimate FD model: reg = plm.FirstDifferenceOLS.from_formula( formula='np.log(crmrte) ~ year + d83 + d84 + d85 + d86 + d87 +' 'lprbarr + lprbconv + lprbpris + lavgsen + lpolpc', data=crime4) results = reg.fit() print(f'results: \n{results}\n')
import wooldridge as woo import numpy as np import pandas as pd import statsmodels.formula.api as smf import linearmodels as plm crime2 = woo.dataWoo('crime2') # create time variable dummy by converting a Boolean variable to an integer: crime2['t'] = (crime2['year'] == 87).astype(int) # False=0, True=1 # create an index in this balanced data set by combining two arrays: id_tmp = np.linspace(1, 46, num=46) crime2['id'] = np.sort(np.concatenate([id_tmp, id_tmp])) # manually calculate first differences per entity for crmrte and unem: crime2['crmrte_diff1'] = \ crime2.sort_values(['id', 'year']).groupby('id')['crmrte'].diff() crime2['unem_diff1'] = \ crime2.sort_values(['id', 'year']).groupby('id')['unem'].diff() var_selection = ['id', 't', 'crimes', 'unem', 'crmrte_diff1', 'unem_diff1'] print(f'crime2[var_selection].head(): \n{crime2[var_selection].head()}\n') # estimate FD model with statmodels on differenced data: reg_sm = smf.ols(formula='crmrte_diff1 ~ unem_diff1', data=crime2) results_sm = reg_sm.fit() # print results: table_sm = pd.DataFrame({'b': round(results_sm.params, 4), 'se': round(results_sm.bse, 4), 't': round(results_sm.tvalues, 4),
import wooldridge as woo import pandas as pd import numpy as np import statsmodels.api as sm import statsmodels.formula.api as smf barium = woo.dataWoo('barium') T = len(barium) # monthly time series starting Feb. 1978: barium.index = pd.date_range(start='1978-02', periods=T, freq='M') reg = smf.ols(formula='np.log(chnimp) ~ np.log(chempi) + np.log(gas) +' 'np.log(rtwex) + befile6 + affile6 + afdec6', data=barium) results = reg.fit() # automatic test: bg_result = sm.stats.diagnostic.acorr_breusch_godfrey(results, nlags=3) fstat_auto = bg_result[2] fpval_auto = bg_result[3] print(f'fstat_auto: {fstat_auto}\n') print(f'fpval_auto: {fpval_auto}\n') # pedestrian test: barium['resid'] = results.resid barium['resid_lag1'] = barium['resid'].shift(1) barium['resid_lag2'] = barium['resid'].shift(2) barium['resid_lag3'] = barium['resid'].shift(3) reg_manual = smf.ols(formula='resid ~ resid_lag1 + resid_lag2 + resid_lag3 +'
import wooldridge as woo import pandas as pd import statsmodels.formula.api as smf k401ksubs = woo.dataWoo('401ksubs') # subsetting data: k401ksubs_sub = k401ksubs[k401ksubs['fsize'] == 1] # WLS: wls_weight = list(1 / k401ksubs_sub['inc']) reg_wls = smf.wls(formula='nettfa ~ inc + I((age-25)**2) + male + e401k', weights=wls_weight, data=k401ksubs_sub) # non-robust (default) results: results_wls = reg_wls.fit() table_default = pd.DataFrame({ 'b': round(results_wls.params, 4), 'se': round(results_wls.bse, 4), 't': round(results_wls.tvalues, 4), 'pval': round(results_wls.pvalues, 4) }) print(f'table_default: \n{table_default}\n') # robust results (Refined White SE): results_white = reg_wls.fit(cov_type='HC3') table_white = pd.DataFrame({ 'b': round(results_white.params, 4), 'se': round(results_white.bse, 4), 't': round(results_white.tvalues, 4),
import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression pd.options.display.max_columns = None def lin_reg(X, y, df): reg = LinearRegression().fit(X, y) print('coef is {}, intercept is {}, R^2 is {}, n={}'.format( reg.coef_, reg.intercept_, reg.score(X, y), len(df.index))) return reg # c1 df = wooldridge.dataWoo('BWGHT') X = np.array(df['cigs']).reshape(-1, 1) y = np.array(df['bwght']).reshape(-1, 1) reg = lin_reg(X, y, df) X = np.array(list(zip(df['cigs'], df['faminc']))) reg = lin_reg(X, y, df) # c2 df = wooldridge.dataWoo('HPRICE1') X = np.array(list(zip(df['sqrft'], df['bdrms']))) y = np.array(df['price']) reg = lin_reg(X, y, df) reg.predict([[2438, 4]]) # c3 df = wooldridge.dataWoo('CEOSAL2') sales = np.log(df['sales']) mktval = np.log(df['mktval'])
import wooldridge as woo import statsmodels.formula.api as smf import pandas as pd gpa2 = woo.dataWoo('gpa2') reg = smf.ols(formula='colgpa ~ sat + hsperc + hsize + I(hsize**2)', data=gpa2) results = reg.fit() # define three sets of regressor variables: cvalues2 = pd.DataFrame( { 'sat': [ 1200, 900, 1400, ], 'hsperc': [30, 20, 5], 'hsize': [5, 3, 1] }, index=['newPerson1', 'newPerson2', 'newPerson3']) # point estimates and 95% confidence and prediction intervals: colgpa_PICI_95 = results.get_prediction(cvalues2).summary_frame(alpha=0.05) print(f'colgpa_PICI_95: \n{colgpa_PICI_95}\n') # point estimates and 99% confidence and prediction intervals: colgpa_PICI_99 = results.get_prediction(cvalues2).summary_frame(alpha=0.01) print(f'colgpa_PICI_99: \n{colgpa_PICI_99}\n')
import wooldridge as woo import pandas as pd import statsmodels.formula.api as smf fertil3 = woo.dataWoo('fertil3') T = len(fertil3) # define time series (years only) beginning in 1913: fertil3.index = pd.date_range(start='1913', periods=T, freq='Y').year # compute first differences: fertil3['gfr_diff1'] = fertil3['gfr'].diff() fertil3['pe_diff1'] = fertil3['pe'].diff() print(f'fertil3.head(): \n{fertil3.head()}\n') # linear regression of model with first differences: reg1 = smf.ols(formula='gfr_diff1 ~ pe_diff1', data=fertil3) results1 = reg1.fit() # print regression table: table1 = pd.DataFrame({ 'b': round(results1.params, 4), 'se': round(results1.bse, 4), 't': round(results1.tvalues, 4), 'pval': round(results1.pvalues, 4) }) print(f'table1: \n{table1}\n') # linear regression of model with lagged differences: fertil3['pe_diff1_lag1'] = fertil3['pe_diff1'].shift(1) fertil3['pe_diff1_lag2'] = fertil3['pe_diff1'].shift(2)
import wooldridge as woo import numpy as np import patsy as pt import scipy.stats as stats import statsmodels.formula.api as smf import statsmodels.base.model as smclass recid = woo.dataWoo('recid') # define dummy for censored observations: censored = recid['cens'] != 0 y, X = pt.dmatrices( 'ldurat ~ workprg + priors + tserved + felon +' 'alcohol + drugs + black + married + educ + age', data=recid, return_type='dataframe') # generate starting solution: reg_ols = smf.ols(formula='ldurat ~ workprg + priors + tserved + felon +' 'alcohol + drugs + black + married + educ + age', data=recid) results_ols = reg_ols.fit() sigma_start = np.log(sum(results_ols.resid**2) / len(results_ols.resid)) params_start = np.concatenate((np.array(results_ols.params), sigma_start), axis=None) # extend statsmodels class by defining nloglikeobs: class CensReg(smclass.GenericLikelihoodModel): def __init__(self, endog, cens, exog): self.cens = cens
import wooldridge as woo import numpy as np import pandas as pd import statsmodels.api as sm import statsmodels.formula.api as smf import patsy as pt hprice1 = woo.dataWoo('hprice1') # estimate model: reg = smf.ols( formula='np.log(price) ~ np.log(lotsize) + np.log(sqrft) + bdrms', data=hprice1) results = reg.fit() # BP test: y, X_bp = pt.dmatrices( 'np.log(price) ~ np.log(lotsize) + np.log(sqrft) + bdrms', data=hprice1, return_type='dataframe') result_bp = sm.stats.diagnostic.het_breuschpagan(results.resid, X_bp) bp_statistic = result_bp[0] bp_pval = result_bp[1] print(f'bp_statistic: {bp_statistic}\n') print(f'bp_pval: {bp_pval}\n') # White test: X_wh = pd.DataFrame({ 'const': 1, 'fitted_reg': results.fittedvalues, 'fitted_reg_sq': results.fittedvalues**2
import wooldridge as woo import statsmodels.formula.api as smf gpa3 = woo.dataWoo('gpa3') # definition of model and hypotheses: reg = smf.ols( formula='cumgpa ~ sat + hsperc + tothrs + female + black + white', data=gpa3, subset=(gpa3['spring'] == 1)) hypotheses = ['black = 0', 'white = 0'] # F-Tests using different variance-covariance formulas: # ususal VCOV: results_default = reg.fit() ftest_default = results_default.f_test(hypotheses) fstat_default = ftest_default.statistic[0][0] fpval_default = ftest_default.pvalue print(f'fstat_default: {fstat_default}\n') print(f'fpval_default: {fpval_default}\n') # refined White VCOV: results_hc3 = reg.fit(cov_type='HC3') ftest_hc3 = results_hc3.f_test(hypotheses) fstat_hc3 = ftest_hc3.statistic[0][0] fpval_hc3 = ftest_hc3.pvalue print(f'fstat_hc3: {fstat_hc3}\n') print(f'fpval_hc3: {fpval_hc3}\n') # classical White VCOV: results_hc0 = reg.fit(cov_type='HC0')
import wooldridge as woo import linearmodels as plm wagepan = woo.dataWoo('wagepan') wagepan['t'] = wagepan['year'] wagepan['entity'] = wagepan['nr'] wagepan = wagepan.set_index(['nr']) # include group specific means: wagepan['married_b'] = wagepan.groupby('nr').mean()['married'] wagepan['union_b'] = wagepan.groupby('nr').mean()['union'] wagepan = wagepan.set_index(['year'], append=True) # estimate CRE: reg_cre = plm.RandomEffects.from_formula( formula='lwage ~ married + union + C(t)*educ + married_b + union_b', data=wagepan) results_cre = reg_cre.fit() # RE test as an Wald test on the CRE specific coefficients: wtest = results_cre.wald_test(formula='married_b = union_b = 0') print(f'wtest: \n{wtest}\n')
import wooldridge as woo import numpy as np import statsmodels.formula.api as smf lawsch85 = woo.dataWoo('lawsch85') # missings in numpy: x_np = np.array(lawsch85['LSAT']) x_np_bar1 = np.mean(x_np) x_np_bar2 = np.nanmean(x_np) print(f'x_np_bar1: {x_np_bar1}\n') print(f'x_np_bar2: {x_np_bar2}\n') # missings in pandas: x_pd = lawsch85['LSAT'] x_pd_bar1 = np.mean(x_pd) x_pd_bar2 = np.nanmean(x_pd) print(f'x_pd_bar1: {x_pd_bar1}\n') print(f'x_pd_bar2: {x_pd_bar2}\n') # observations and variables: print(f'lawsch85.shape: {lawsch85.shape}\n') # regression (missings are taken care of by default): reg = smf.ols(formula='np.log(salary) ~ LSAT + cost + age', data=lawsch85) results = reg.fit() print(f'results.nobs: {results.nobs}\n')
import wooldridge as woo import pandas as pd import statsmodels.formula.api as smf cps78_85 = woo.dataWoo('cps78_85') # OLS results including interaction terms: reg = smf.ols(formula='lwage ~ y85*(educ+female) + exper +' 'I((exper**2)/100) + union', data=cps78_85) results = reg.fit() # print regression table: table = pd.DataFrame({ 'b': round(results.params, 4), 'se': round(results.bse, 4), 't': round(results.tvalues, 4), 'pval': round(results.pvalues, 4) }) print(f'table: \n{table}\n')
import wooldridge as woo import numpy as np import scipy.stats as stats audit = woo.dataWoo('audit') y = audit['y'] # ingredients to CI formula: avgy = np.mean(y) n = len(y) sdy = np.std(y, ddof=1) se = sdy / np.sqrt(n) c95 = stats.norm.ppf(0.975) c99 = stats.norm.ppf(0.995) # 95% confidence interval: lowerCI95 = avgy - c95 * se print(f'lowerCI95: {lowerCI95}\n') upperCI95 = avgy + c95 * se print(f'upperCI95: {upperCI95}\n') # 99% confidence interval: lowerCI99 = avgy - c99 * se print(f'lowerCI99: {lowerCI99}\n') upperCI99 = avgy + c99 * se print(f'upperCI99: {upperCI99}\n')
import wooldridge as woo import statsmodels.formula.api as smf import matplotlib.pyplot as plt vote1 = woo.dataWoo('vote1') # OLS regression: reg = smf.ols(formula='voteA ~ shareA', data=vote1) results = reg.fit() b = results.params print(f'b: \n{b}\n') # scatter plot and fitted values: plt.plot('shareA', 'voteA', data=vote1, color='grey', marker='o', linestyle='') plt.plot(vote1['shareA'], results.fittedvalues, color='black', linestyle='-') plt.ylabel('voteA') plt.xlabel('shareA') plt.savefig('PyGraphs/Example-2-5.pdf')
import wooldridge as woo import pandas as pd import numpy as np import statsmodels.formula.api as smf import scipy.stats as stats mroz = woo.dataWoo('mroz') # estimate models: reg_lin = smf.ols(formula='inlf ~ nwifeinc + educ + exper + I(exper**2) +' 'age + kidslt6 + kidsge6', data=mroz) results_lin = reg_lin.fit(cov_type='HC3') reg_logit = smf.logit(formula='inlf ~ nwifeinc + educ + exper + I(exper**2) +' 'age + kidslt6 + kidsge6', data=mroz) results_logit = reg_logit.fit(disp=0) reg_probit = smf.probit(formula='inlf ~ nwifeinc + educ + exper + I(exper**2) +' 'age + kidslt6 + kidsge6', data=mroz) results_probit = reg_probit.fit(disp=0) # manual average partial effects: APE_lin = np.array(results_lin.params) xb_logit = results_logit.fittedvalues factor_logit = np.mean(stats.logistic.pdf(xb_logit)) APE_logit_manual = results_logit.params * factor_logit xb_probit = results_probit.fittedvalues factor_probit = np.mean(stats.norm.pdf(xb_probit)) APE_probit_manual = results_probit.params * factor_probit
import wooldridge as woo import numpy as np import pandas as pd import statsmodels.api as sm import statsmodels.formula.api as smf import patsy as pt smoke = woo.dataWoo('smoke') # OLS: reg_ols = smf.ols(formula='cigs ~ np.log(income) + np.log(cigpric) +' 'educ + age + I(age**2) + restaurn', data=smoke) results_ols = reg_ols.fit() table_ols = pd.DataFrame({ 'b': round(results_ols.params, 4), 'se': round(results_ols.bse, 4), 't': round(results_ols.tvalues, 4), 'pval': round(results_ols.pvalues, 4) }) print(f'table_ols: \n{table_ols}\n') # BP test: y, X = pt.dmatrices( 'cigs ~ np.log(income) + np.log(cigpric) + educ +' 'age + I(age**2) + restaurn', data=smoke, return_type='dataframe') result_bp = sm.stats.diagnostic.het_breuschpagan(results_ols.resid, X) bp_statistic = result_bp[0] bp_pval = result_bp[1]
import wooldridge as woo import pandas as pd import statsmodels.formula.api as smf nyse = woo.dataWoo('nyse') nyse['ret'] = nyse['return'] # add all lags up to order 3: nyse['ret_lag1'] = nyse['ret'].shift(1) nyse['ret_lag2'] = nyse['ret'].shift(2) nyse['ret_lag3'] = nyse['ret'].shift(3) # linear regression of model with lags: reg1 = smf.ols(formula='ret ~ ret_lag1', data=nyse) reg2 = smf.ols(formula='ret ~ ret_lag1 + ret_lag2', data=nyse) reg3 = smf.ols(formula='ret ~ ret_lag1 + ret_lag2 + ret_lag3', data=nyse) results1 = reg1.fit() results2 = reg2.fit() results3 = reg3.fit() # print regression tables: table1 = pd.DataFrame({ 'b': round(results1.params, 4), 'se': round(results1.bse, 4), 't': round(results1.tvalues, 4), 'pval': round(results1.pvalues, 4) }) print(f'table1: \n{table1}\n') table2 = pd.DataFrame({ 'b': round(results2.params, 4),
import wooldridge as woo import statsmodels.formula.api as smf crime1 = woo.dataWoo('crime1') # model without avgsen: reg = smf.ols(formula='narr86 ~ pcnv + ptime86 + qemp86', data=crime1) results = reg.fit() print(f'results.summary(): \n{results.summary()}\n')
import wooldridge as woo import numpy as np import statsmodels.formula.api as smf import scipy.stats as stats mlb1 = woo.dataWoo('mlb1') n = mlb1.shape[0] # unrestricted OLS regression: reg_ur = smf.ols( formula='np.log(salary) ~ years + gamesyr + bavg + hrunsyr + rbisyr', data=mlb1) fit_ur = reg_ur.fit() r2_ur = fit_ur.rsquared print(f'r2_ur: {r2_ur}\n') # restricted OLS regression: reg_r = smf.ols(formula='np.log(salary) ~ years + gamesyr', data=mlb1) fit_r = reg_r.fit() r2_r = fit_r.rsquared print(f'r2_r: {r2_r}\n') # F statistic: fstat = (r2_ur - r2_r) / (1 - r2_ur) * (n - 6) / 3 print(f'fstat: {fstat}\n') # CV for alpha=1% using the F distribution with 3 and 347 d.f.: cv = stats.f.ppf(1 - 0.01, 3, 347) print(f'cv: {cv}\n') # p value = 1-cdf of the appropriate F distribution:
import statsmodels.api as sm import numpy as np import wooldridge import pandas as pd # c4 df = wooldridge.dataWoo('VOTE1') df.dropna(inplace=True) X = df[['prtystrA', 'democA', 'expendA', 'expendB']] X['expendA'] = np.log(X['expendA']) X['expendB'] = np.log(X['expendB']) X = sm.add_constant(X) y = df['voteA'] model = sm.OLS(y, X, missing='drop').fit() sm.stats.diagnostic.het_white(model.resid, X) # c13 df = wooldridge.dataWoo('FERTIL2') df.dropna(inplace=True) model = sm.formula.rlm('children ~ age + age^2 + educ + electric + urban', data=df).fit()