# author: Thomas Haslwanter, date: Sept-2015 import numpy as np import matplotlib.pyplot as plt from scipy import stats import seaborn as sns import statsmodels.formula.api as sm # We don't need to invent the wheel twice ;) from S11_correlation import getModelData if __name__ == '__main__': # get the data data = getModelData(show=False) # Regression -------------------------------------------------------- # For "ordinary least square" models, you can do the model directly with pandas #model = pd.ols(x=data['year'], y=data['AvgTmp']) # or you can use the formula-approach from statsmodels: # offsets are automatically included in the model model = sm.ols('AvgTmp ~ year', data) results = model.fit() print(results.summary()) # Visually, the confidence intervals can be shown using seaborn sns.lmplot('year', 'AvgTmp', data) plt.show()
'''Solution for Exercise "Normality Check" in Chapter 11 ''' # author: Thomas Haslwanter, date: Sept-2015 from scipy import stats import matplotlib.pyplot as plt import statsmodels.formula.api as sm import seaborn as sns # We don't need to invent the wheel twice ;) from S11_correlation import getModelData if __name__== '__main__': # get the data data = getModelData(show=False) # Fit the model model = sm.ols('AvgTmp ~ year', data) results = model.fit() # Normality check ---------------------------------------------------- res_data = results.resid # Get the values for the residuals # QQ-plot, for a visual check stats.probplot(res_data, plot=plt) plt.show() # Normality test, for a quantitative check: _, pVal = stats.normaltest(res_data) if pVal < 0.05: