def test_acorr_breush_godfrey(self): res = self.res #bgf = bgtest(fm, order = 4, type="F") breushgodfrey_f = dict(statistic=1.179280833676792, pvalue=0.321197487261203, parameters=( 4, 195, ), distr='f') #> bgc = bgtest(fm, order = 4, type="Chisq") #> mkhtest(bgc, "breushpagan_c", "chi2") breushgodfrey_c = dict(statistic=4.771042651230007, pvalue=0.3116067133066697, parameters=(4, ), distr='chi2') bg = smsdia.acorr_breush_godfrey(res, nlags=4) bg_r = [ breushgodfrey_c['statistic'], breushgodfrey_c['pvalue'], breushgodfrey_f['statistic'], breushgodfrey_f['pvalue'] ] assert_almost_equal(bg, bg_r, decimal=13) # check that lag choice works bg2 = smsdia.acorr_breush_godfrey(res, nlags=None) bg3 = smsdia.acorr_breush_godfrey(res, nlags=14) assert_almost_equal(bg2, bg3, decimal=13)
def test_acorr_breush_godfrey(self): res = self.res # bgf = bgtest(fm, order = 4, type="F") breushgodfrey_f = dict(statistic=1.179280833676792, pvalue=0.321197487261203, parameters=(4, 195), distr="f") # > bgc = bgtest(fm, order = 4, type="Chisq") # > mkhtest(bgc, "breushpagan_c", "chi2") breushgodfrey_c = dict(statistic=4.771042651230007, pvalue=0.3116067133066697, parameters=(4,), distr="chi2") bg = smsdia.acorr_breush_godfrey(res, nlags=4) bg_r = [ breushgodfrey_c["statistic"], breushgodfrey_c["pvalue"], breushgodfrey_f["statistic"], breushgodfrey_f["pvalue"], ] assert_almost_equal(bg, bg_r, decimal=13) # check that lag choice works bg2 = smsdia.acorr_breush_godfrey(res, nlags=None) bg3 = smsdia.acorr_breush_godfrey(res, nlags=14) assert_almost_equal(bg2, bg3, decimal=13)
def test_acorr_breush_godfrey(self): res = self.res #bgf = bgtest(fm, order = 4, type="F") breushgodfrey_f = dict(statistic=1.179280833676792, pvalue=0.321197487261203, parameters=(4,195,), distr='f') #> bgc = bgtest(fm, order = 4, type="Chisq") #> mkhtest(bgc, "breushpagan_c", "chi2") breushgodfrey_c = dict(statistic=4.771042651230007, pvalue=0.3116067133066697, parameters=(4,), distr='chi2') bg = smsdia.acorr_breush_godfrey(res, nlags=4) bg_r = [breushgodfrey_c['statistic'], breushgodfrey_c['pvalue'], breushgodfrey_f['statistic'], breushgodfrey_f['pvalue']] assert_almost_equal(bg, bg_r, decimal=13)
def Fig_OLS_Checks(): #fs = 10 # font size used across figures #color = str() #OrC = 'open' SampSizes = [5, 6, 7, 8, 9, 10, 13, 16, 20, 30, 40, 50, 60, 70, 80, 90, 100] Iterations = 100 fig = plt.figure(figsize=(12, 8)) # MODEL PARAMETERS Rare_MacIntercept_pVals = [] # List to hold coefficient p-values Rare_MacIntercept_Coeffs = [] # List to hold coefficients Rich_MacIntercept_pVals = [] Rich_MacIntercept_Coeffs = [] Dom_MacIntercept_pVals = [] Dom_MacIntercept_Coeffs = [] Even_MacIntercept_pVals = [] Even_MacIntercept_Coeffs = [] Rare_MicIntercept_pVals = [] Rare_MicIntercept_Coeffs = [] Rich_MicIntercept_pVals = [] Rich_MicIntercept_Coeffs = [] Dom_MicIntercept_pVals = [] Dom_MicIntercept_Coeffs = [] Even_MicIntercept_pVals = [] Even_MicIntercept_Coeffs = [] Rare_MacSlope_pVals = [] Rare_MacSlope_Coeffs = [] Rich_MacSlope_pVals = [] Rich_MacSlope_Coeffs = [] Dom_MacSlope_pVals = [] Dom_MacSlope_Coeffs = [] Even_MacSlope_pVals = [] Even_MacSlope_Coeffs = [] Rare_MicSlope_pVals = [] Rare_MicSlope_Coeffs = [] Rich_MicSlope_pVals = [] Rich_MicSlope_Coeffs = [] Dom_MicSlope_pVals = [] Dom_MicSlope_Coeffs = [] Even_MicSlope_pVals = [] Even_MicSlope_Coeffs = [] RareR2List = [] # List to hold model R2 RarepFList = [] # List to hold significance of model R2 RichR2List = [] # List to hold model R2 RichpFList = [] # List to hold significance of model R2 DomR2List = [] # List to hold model R2 DompFList = [] # List to hold significance of model R2 EvenR2List = [] # List to hold model R2 EvenpFList = [] # List to hold significance of model R2 # ASSUMPTIONS OF LINEAR REGRESSION # 1. Error in predictor variables is negligible...presumably yes # 2. Variables are measured at the continuous level...yes # 3. The relationship is linear #RarepLinListHC = [] RarepLinListRainB = [] RarepLinListLM = [] #RichpLinListHC = [] RichpLinListRainB = [] RichpLinListLM = [] #DompLinListHC = [] DompLinListRainB = [] DompLinListLM = [] #EvenpLinListHC = [] EvenpLinListRainB = [] EvenpLinListLM = [] # 4. There are no significant outliers...need to find tests or measures # 5. Independence of observations (no serial correlation in residuals) RarepCorrListBG = [] RarepCorrListF = [] RichpCorrListBG = [] RichpCorrListF = [] DompCorrListBG = [] DompCorrListF = [] EvenpCorrListBG = [] EvenpCorrListF = [] # 6. Homoscedacticity RarepHomoHW = [] RarepHomoHB = [] RichpHomoHW = [] RichpHomoHB = [] DompHomoHW = [] DompHomoHB = [] EvenpHomoHW = [] EvenpHomoHB = [] # 7. Normally distributed residuals (errors) RarepNormListOmni = [] # Omnibus test for normality RarepNormListJB = [] # Calculate residual skewness, kurtosis, and do the JB test for normality RarepNormListKS = [] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance RarepNormListAD = [] # Anderson-Darling test for normal distribution unknown mean and variance RichpNormListOmni = [] # Omnibus test for normality RichpNormListJB = [] # Calculate residual skewness, kurtosis, and do the JB test for normality RichpNormListKS = [] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance RichpNormListAD = [] # Anderson-Darling test for normal distribution unknown mean and variance DompNormListOmni = [] # Omnibus test for normality DompNormListJB = [] # Calculate residual skewness, kurtosis, and do the JB test for normality DompNormListKS = [] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance DompNormListAD = [] # Anderson-Darling test for normal distribution unknown mean and variance EvenpNormListOmni = [] # Omnibus test for normality EvenpNormListJB = [] # Calculate residual skewness, kurtosis, and do the JB test for normality EvenpNormListKS = [] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance EvenpNormListAD = [] # Anderson-Darling test for normal distribution unknown mean and variance NLIST = [] for SampSize in SampSizes: sRare_MacIntercept_pVals = [] # List to hold coefficient p-values sRare_MacIntercept_Coeffs = [] # List to hold coefficients sRich_MacIntercept_pVals = [] # List to hold coefficient p-values sRich_MacIntercept_Coeffs = [] # List to hold coefficients sDom_MacIntercept_pVals = [] sDom_MacIntercept_Coeffs = [] sEven_MacIntercept_pVals = [] sEven_MacIntercept_Coeffs = [] sRare_MicIntercept_pVals = [] sRare_MicIntercept_Coeffs = [] sRich_MicIntercept_pVals = [] sRich_MicIntercept_Coeffs = [] sDom_MicIntercept_pVals = [] sDom_MicIntercept_Coeffs = [] sEven_MicIntercept_pVals = [] sEven_MicIntercept_Coeffs = [] sRare_MacSlope_pVals = [] sRare_MacSlope_Coeffs = [] sRich_MacSlope_pVals = [] sRich_MacSlope_Coeffs = [] sDom_MacSlope_pVals = [] sDom_MacSlope_Coeffs = [] sEven_MacSlope_pVals = [] sEven_MacSlope_Coeffs = [] sRare_MicSlope_pVals = [] sRare_MicSlope_Coeffs = [] sRich_MicSlope_pVals = [] sRich_MicSlope_Coeffs = [] sDom_MicSlope_pVals = [] sDom_MicSlope_Coeffs = [] sEven_MicSlope_pVals = [] sEven_MicSlope_Coeffs = [] sRareR2List = [] # List to hold model R2 sRarepFList = [] # List to hold significance of model R2 sRichR2List = [] # List to hold model R2 sRichpFList = [] # List to hold significance of model R2 sDomR2List = [] # List to hold model R2 sDompFList = [] # List to hold significance of model R2 sEvenR2List = [] # List to hold model R2 sEvenpFList = [] # List to hold significance of model R2 # ASSUMPTIONS OF LINEAR REGRESSION # 1. Error in predictor variables is negligible...presumably yes # 2. Variables are measured at the continuous level...yes # 3. The relationship is linear #sRarepLinListHC = [] sRarepLinListRainB = [] sRarepLinListLM = [] #sRichpLinListHC = [] sRichpLinListRainB = [] sRichpLinListLM = [] #sDompLinListHC = [] sDompLinListRainB = [] sDompLinListLM = [] #sEvenpLinListHC = [] sEvenpLinListRainB = [] sEvenpLinListLM = [] # 4. There are no significant outliers...need to find tests or measures # 5. Independence of observations (no serial correlation in residuals) sRarepCorrListBG = [] sRarepCorrListF = [] sRichpCorrListBG = [] sRichpCorrListF = [] sDompCorrListBG = [] sDompCorrListF = [] sEvenpCorrListBG = [] sEvenpCorrListF = [] # 6. Homoscedacticity sRarepHomoHW = [] sRarepHomoHB = [] sRichpHomoHW = [] sRichpHomoHB = [] sDompHomoHW = [] sDompHomoHB = [] sEvenpHomoHW = [] sEvenpHomoHB = [] # 7. Normally distributed residuals (errors) sRarepNormListOmni = [] # Omnibus test for normality sRarepNormListJB = [] # Calculate residual skewness, kurtosis, and do the JB test for normality sRarepNormListKS = [] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance sRarepNormListAD = [] # Anderson-Darling test for normal distribution unknown mean and variance sRichpNormListOmni = [] # Omnibus test for normality sRichpNormListJB = [] # Calculate residual skewness, kurtosis, and do the JB test for normality sRichpNormListKS = [] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance sRichpNormListAD = [] # Anderson-Darling test for normal distribution unknown mean and variance sDompNormListOmni = [] # Omnibus test for normality sDompNormListJB = [] # Calculate residual skewness, kurtosis, and do the JB test for normality sDompNormListKS = [] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance sDompNormListAD = [] # Anderson-Darling test for normal distribution unknown mean and variance sEvenpNormListOmni = [] # Omnibus test for normality sEvenpNormListJB = [] # Calculate residual skewness, kurtosis, and do the JB test for normality sEvenpNormListKS = [] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance sEvenpNormListAD = [] # Anderson-Darling test for normal distribution unknown mean and variance for iteration in range(Iterations): Nlist, Slist, Evarlist, ESimplist, ENeelist, EHeiplist, EQlist = [[], [], [], [], [], [], []] klist, Shanlist, BPlist, SimpDomlist, SinglesList, tenlist, onelist = [[], [], [], [], [], [], []] NmaxList, rareSkews, KindList = [[], [], []] NSlist = [] ct = 0 radDATA = [] datasets = [] GoodNames = ['EMPclosed', 'HMP', 'BIGN', 'TARA', 'BOVINE', 'HUMAN', 'LAUB', 'SED', 'CHU', 'CHINA', 'CATLIN', 'FUNGI', 'HYDRO', 'BBS', 'CBC', 'MCDB', 'GENTRY', 'FIA'] # all microbe data is MGRAST mlist = ['micro', 'macro'] for m in mlist: for name in os.listdir(mydir +'data/'+m): if name in GoodNames: pass else: continue path = mydir+'data/'+m+'/'+name+'/'+name+'-SADMetricData.txt' num_lines = sum(1 for line in open(path)) datasets.append([name, m, num_lines]) numMac = 0 numMic = 0 radDATA = [] for d in datasets: name, kind, numlines = d lines = [] lines = np.random.choice(range(1, numlines+1), SampSize, replace=True) path = mydir+'data/'+kind+'/'+name+'/'+name+'-SADMetricData.txt' for line in lines: data = linecache.getline(path, line) radDATA.append(data) #print name, kind, numlines, len(radDATA) for data in radDATA: data = data.split() if len(data) == 0: print 'no data' continue name, kind, N, S, Var, Evar, ESimp, EQ, O, ENee, EPielou, EHeip, BP, SimpDom, Nmax, McN, skew, logskew, chao1, ace, jknife1, jknife2, margalef, menhinick, preston_a, preston_S = data N = float(N) S = float(S) Nlist.append(float(np.log(N))) Slist.append(float(np.log(S))) NSlist.append(float(np.log(N/S))) Evarlist.append(float(np.log(float(Evar)))) ESimplist.append(float(np.log(float(ESimp)))) KindList.append(kind) BPlist.append(float(BP)) NmaxList.append(float(np.log(float(BP)*float(N)))) EHeiplist.append(float(EHeip)) # lines for the log-modulo transformation of skewnness skew = float(skew) sign = 1 if skew < 0: sign = -1 lms = np.log(np.abs(skew) + 1) lms = lms * sign #if lms > 3: print name, N, S rareSkews.append(float(lms)) if kind == 'macro': numMac += 1 elif kind == 'micro': numMic += 1 ct+=1 #print 'Sample Size:',SampSize, ' Mic:', numMic,'Mac:', numMac # Multiple regression for Rarity d = pd.DataFrame({'N': list(Nlist)}) d['Rarity'] = list(rareSkews) d['Kind'] = list(KindList) RarityResults = smf.ols('Rarity ~ N * Kind', d).fit() # Fit the dummy variable regression model #print RarityResults.summary(), '\n' # Multiple regression for Rarity d = pd.DataFrame({'N': list(Nlist)}) d['Richness'] = list(Slist) d['Kind'] = list(KindList) RichnessResults = smf.ols('Richness ~ N * Kind', d).fit() # Fit the dummy variable regression model #print RichnessResults.summary(), '\n' # Multiple regression for Dominance d = pd.DataFrame({'N': list(Nlist)}) d['Dominance'] = list(NmaxList) d['Kind'] = list(KindList) DomResults = smf.ols('Dominance ~ N * Kind', d).fit() # Fit the dummy variable regression model #print DomResults.summary(), '\n' # Multiple regression for Evenness d = pd.DataFrame({'N': list(Nlist)}) d['Evenness'] = list(ESimplist) d['Kind'] = list(KindList) EvenResults = smf.ols('Evenness ~ N * Kind', d).fit() # Fit the dummy variable regression model #print RarityResults.summary(), '\n' RareResids = RarityResults.resid # residuals of the model RichResids = RichnessResults.resid # residuals of the model DomResids = DomResults.resid # residuals of the model EvenResids = EvenResults.resid # residuals of the model # MODEL RESULTS/FIT RareFpval = RarityResults.f_pvalue Rarer2 = RarityResults.rsquared # coefficient of determination #Adj_r2 = RareResults.rsquared_adj # adjusted RichFpval = RichnessResults.f_pvalue Richr2 = RichnessResults.rsquared # coefficient of determination #Adj_r2 = RichnessResults.rsquared_adj # adjusted DomFpval = DomResults.f_pvalue Domr2 = DomResults.rsquared # coefficient of determination #Adj_r2 = DomResults.rsquared_adj # adjusted EvenFpval = EvenResults.f_pvalue Evenr2 = EvenResults.rsquared # coefficient of determination #Adj_r2 = EvenResuls.rsquared_adj # adjusted # MODEL PARAMETERS and p-values Rareparams = RarityResults.params Rareparams = Rareparams.tolist() Rarepvals = RarityResults.pvalues Rarepvals = Rarepvals.tolist() Richparams = RichnessResults.params Richparams = Richparams.tolist() Richpvals = RichnessResults.pvalues Richpvals = Richpvals.tolist() Domparams = DomResults.params Domparams = Domparams.tolist() Dompvals = DomResults.pvalues Dompvals = Dompvals.tolist() Evenparams = EvenResults.params Evenparams = Evenparams.tolist() Evenpvals = EvenResults.pvalues Evenpvals = Evenpvals.tolist() sRare_MacIntercept_pVals.append(Rarepvals[0]) sRare_MacIntercept_Coeffs.append(Rareparams[0]) sRich_MacIntercept_pVals.append(Rarepvals[0]) sRich_MacIntercept_Coeffs.append(Rareparams[0]) sDom_MacIntercept_pVals.append(Dompvals[0]) sDom_MacIntercept_Coeffs.append(Domparams[0]) sEven_MacIntercept_pVals.append(Evenpvals[0]) sEven_MacIntercept_Coeffs.append(Evenparams[0]) sRare_MicIntercept_pVals.append(Rarepvals[1]) if Rarepvals[1] > 0.05: sRare_MicIntercept_Coeffs.append(Rareparams[1]) else: sRare_MicIntercept_Coeffs.append(Rareparams[1]) sRich_MicIntercept_pVals.append(Richpvals[1]) if Richpvals[1] > 0.05: sRich_MicIntercept_Coeffs.append(Richparams[1]) else: sRich_MicIntercept_Coeffs.append(Richparams[1]) sDom_MicIntercept_pVals.append(Dompvals[1]) if Dompvals[1] > 0.05: sDom_MicIntercept_Coeffs.append(Domparams[1]) else: sDom_MicIntercept_Coeffs.append(Domparams[1]) sEven_MicIntercept_pVals.append(Evenpvals[1]) if Evenpvals[1] > 0.05: sEven_MicIntercept_Coeffs.append(Evenparams[1]) else: sEven_MicIntercept_Coeffs.append(Evenparams[1]) sRare_MacSlope_pVals.append(Rarepvals[2]) sRare_MacSlope_Coeffs.append(Rareparams[2]) sRich_MacSlope_pVals.append(Richpvals[2]) sRich_MacSlope_Coeffs.append(Richparams[2]) sDom_MacSlope_pVals.append(Dompvals[2]) sDom_MacSlope_Coeffs.append(Domparams[2]) sEven_MacSlope_pVals.append(Evenpvals[2]) sEven_MacSlope_Coeffs.append(Evenparams[2]) sRare_MicSlope_pVals.append(Rarepvals[3]) if Rarepvals[3] > 0.05: sRare_MicSlope_Coeffs.append(Rareparams[3]) else: sRare_MicSlope_Coeffs.append(Rareparams[3]) sRich_MicSlope_pVals.append(Richpvals[3]) if Richpvals[3] > 0.05: sRich_MicSlope_Coeffs.append(Richparams[3]) else: sRich_MicSlope_Coeffs.append(Richparams[3]) sDom_MicSlope_pVals.append(Dompvals[3]) if Dompvals[3] > 0.05: sDom_MicSlope_Coeffs.append(Domparams[3]) else: sDom_MicSlope_Coeffs.append(Domparams[3]) sEven_MicSlope_pVals.append(Evenpvals[3]) if Evenpvals[3] > 0.05: sEven_MicSlope_Coeffs.append(Evenparams[3]) else: sEven_MicSlope_Coeffs.append(Evenparams[3]) sRareR2List.append(Rarer2) sRarepFList.append(RareFpval) sRichR2List.append(Richr2) sRichpFList.append(RichFpval) sDomR2List.append(Domr2) sDompFList.append(DomFpval) sEvenR2List.append(Evenr2) sEvenpFList.append(EvenFpval) # TESTS OF LINEAR REGRESSION ASSUMPTIONS # Error in predictor variables is negligible...Presumably Yes # Variables are measured at the continuous level...Definitely Yes # TESTS FOR LINEARITY, i.e., WHETHER THE DATA ARE CORRECTLY MODELED AS LINEAR #HC = smd.linear_harvey_collier(RarityResults) # Harvey Collier test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. #sRarepLinListHC.append(HC) #HC = smd.linear_harvey_collier(DomResults) # Harvey Collier test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. #sDompLinListHC.append(HC) #HC = smd.linear_harvey_collier(EvenResults) # Harvey Collier test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. #sEvenpLinListHC.append(HC) RB = smd.linear_rainbow(RarityResults) # Rainbow test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. sRarepLinListRainB.append(RB[1]) RB = smd.linear_rainbow(RichnessResults) # Rainbow test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. sRichpLinListRainB.append(RB[1]) RB = smd.linear_rainbow(DomResults) # Rainbow test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. sDompLinListRainB.append(RB[1]) RB = smd.linear_rainbow(EvenResults) # Rainbow test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. sEvenpLinListRainB.append(RB[1]) LM = smd.linear_lm(RarityResults.resid, RarityResults.model.exog) # Lagrangian multiplier test for linearity sRarepLinListLM.append(LM[1]) LM = smd.linear_lm(RichnessResults.resid, RichnessResults.model.exog) # Lagrangian multiplier test for linearity sRichpLinListLM.append(LM[1]) LM = smd.linear_lm(DomResults.resid, DomResults.model.exog) # Lagrangian multiplier test for linearity sDompLinListLM.append(LM[1]) LM = smd.linear_lm(EvenResults.resid, EvenResults.model.exog) # Lagrangian multiplier test for linearity sEvenpLinListLM.append(LM[1]) # INDEPENDENCE OF OBSERVATIONS (no serial correlation in residuals) BGtest = smd.acorr_breush_godfrey(RarityResults, nlags=None, store=False) # Breusch Godfrey Lagrange Multiplier tests for residual autocorrelation # Lagrange multiplier test statistic, p-value for Lagrange multiplier test, fstatistic for F test, pvalue for F test #BGtest = smd.acorr_ljungbox(RareResids, lags=None, boxpierce=True) sRarepCorrListBG.append(BGtest[1]) sRarepCorrListF.append(BGtest[3]) BGtest = smd.acorr_breush_godfrey(RichnessResults, nlags=None, store=False) # Breusch Godfrey Lagrange Multiplier tests for residual autocorrelation # Lagrange multiplier test statistic, p-value for Lagrange multiplier test, fstatistic for F test, pvalue for F test #BGtest = smd.acorr_ljungbox(RichResids, lags=None, boxpierce=True) sRichpCorrListBG.append(BGtest[1]) sRichpCorrListF.append(BGtest[3]) BGtest = smd.acorr_breush_godfrey(DomResults, nlags=None, store=False) # Breusch Godfrey Lagrange Multiplier tests for residual autocorrelation # Lagrange multiplier test statistic, p-value for Lagrange multiplier test, fstatistic for F test, pvalue for F test #BGtest = smd.acorr_ljungbox(DomResids, lags=None, boxpierce=True) sDompCorrListBG.append(BGtest[1]) sDompCorrListF.append(BGtest[3]) BGtest = smd.acorr_breush_godfrey(EvenResults, nlags=None, store=False) # Breusch Godfrey Lagrange Multiplier tests for residual autocorrelation # Lagrange multiplier test statistic, p-value for Lagrange multiplier test, fstatistic for F test, pvalue for F test #BGtest = smd.acorr_ljungbox(EvenResids, lags=None, boxpierce=True) sEvenpCorrListBG.append(BGtest[1]) sEvenpCorrListF.append(BGtest[3]) # There are no significant outliers...Need tests or measures/metrics # HOMOSCEDASTICITY # These tests return: # 1. lagrange multiplier statistic, # 2. p-value of lagrange multiplier test, # 3. f-statistic of the hypothesis that the error variance does not depend on x, # 4. p-value for the f-statistic HW = sms.het_white(RareResids, RarityResults.model.exog) sRarepHomoHW.append(HW[3]) HW = sms.het_white(RichResids, RichnessResults.model.exog) sRichpHomoHW.append(HW[3]) HW = sms.het_white(DomResids, DomResults.model.exog) sDompHomoHW.append(HW[3]) HW = sms.het_white(EvenResids, EvenResults.model.exog) sEvenpHomoHW.append(HW[3]) HB = sms.het_breushpagan(RareResids, RarityResults.model.exog) sRarepHomoHB.append(HB[3]) HB = sms.het_breushpagan(RichResids, RichnessResults.model.exog) sRichpHomoHB.append(HB[3]) HB = sms.het_breushpagan(DomResids, DomResults.model.exog) sDompHomoHB.append(HB[3]) HB = sms.het_breushpagan(EvenResids, EvenResults.model.exog) sEvenpHomoHB.append(HB[3]) # 7. NORMALITY OF ERROR TERMS O = sms.omni_normtest(RareResids) sRarepNormListOmni.append(O[1]) O = sms.omni_normtest(RichResids) sRichpNormListOmni.append(O[1]) O = sms.omni_normtest(DomResids) sDompNormListOmni.append(O[1]) O = sms.omni_normtest(EvenResids) sEvenpNormListOmni.append(O[1]) JB = sms.jarque_bera(RareResids) sRarepNormListJB.append(JB[1]) # Calculate residual skewness, kurtosis, and do the JB test for normality JB = sms.jarque_bera(RichResids) sRichpNormListJB.append(JB[1]) # Calculate residual skewness, kurtosis, and do the JB test for normality JB = sms.jarque_bera(DomResids) sDompNormListJB.append(JB[1]) # Calculate residual skewness, kurtosis, and do the JB test for normality JB = sms.jarque_bera(EvenResids) sEvenpNormListJB.append(JB[1]) # Calculate residual skewness, kurtosis, and do the JB test for normality KS = smd.kstest_normal(RareResids) sRarepNormListKS.append(KS[1]) # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance KS = smd.kstest_normal(RichResids) sRichpNormListKS.append(KS[1]) # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance KS = smd.kstest_normal(DomResids) sDompNormListKS.append(KS[1]) # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance KS = smd.kstest_normal(EvenResids) sEvenpNormListKS.append(KS[1]) # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance AD = smd.normal_ad(RareResids) sRarepNormListAD.append(AD[1]) # Anderson-Darling test for normal distribution unknown mean and variance AD = smd.normal_ad(RichResids) sRichpNormListAD.append(AD[1]) # Anderson-Darling test for normal distribution unknown mean and variance AD = smd.normal_ad(DomResids) sDompNormListAD.append(AD[1]) # Anderson-Darling test for normal distribution unknown mean and variance AD = smd.normal_ad(EvenResids) sEvenpNormListAD.append(AD[1]) # Anderson-Darling test for normal distribution unknown mean and variance print 'Sample size:',SampSize, 'iteration:',iteration NLIST.append(SampSize) Rare_MacIntercept_pVals.append(np.mean(sRare_MacIntercept_pVals)) # List to hold coefficient p-values Rare_MacIntercept_Coeffs.append(np.mean(sRare_MacIntercept_Coeffs)) # List to hold coefficients Rich_MacIntercept_pVals.append(np.mean(sRich_MacIntercept_pVals)) # List to hold coefficient p-values Rich_MacIntercept_Coeffs.append(np.mean(sRich_MacIntercept_Coeffs)) # List to hold coefficients Dom_MacIntercept_pVals.append(np.mean(sDom_MacIntercept_pVals)) Dom_MacIntercept_Coeffs.append(np.mean(sDom_MacIntercept_Coeffs)) Even_MacIntercept_pVals.append(np.mean(sEven_MacIntercept_pVals)) Even_MacIntercept_Coeffs.append(np.mean(sEven_MacIntercept_Coeffs)) Rare_MicIntercept_pVals.append(np.mean(sRare_MicIntercept_pVals)) Rare_MicIntercept_Coeffs.append(np.mean(sRare_MicIntercept_Coeffs)) Rich_MicIntercept_pVals.append(np.mean(sRich_MicIntercept_pVals)) Rich_MicIntercept_Coeffs.append(np.mean(sRich_MicIntercept_Coeffs)) Dom_MicIntercept_pVals.append(np.mean(sDom_MicIntercept_pVals)) Dom_MicIntercept_Coeffs.append(np.mean(sDom_MicIntercept_Coeffs)) Even_MicIntercept_pVals.append(np.mean(sEven_MicIntercept_pVals)) Even_MicIntercept_Coeffs.append(np.mean(sEven_MicIntercept_Coeffs)) Rare_MacSlope_pVals.append(np.mean(sRare_MacSlope_pVals)) # List to hold coefficient p-values Rare_MacSlope_Coeffs.append(np.mean(sRare_MacSlope_Coeffs)) # List to hold coefficients Rich_MacSlope_pVals.append(np.mean(sRich_MacSlope_pVals)) # List to hold coefficient p-values Rich_MacSlope_Coeffs.append(np.mean(sRich_MacSlope_Coeffs)) # List to hold coefficients Dom_MacSlope_pVals.append(np.mean(sDom_MacSlope_pVals)) Dom_MacSlope_Coeffs.append(np.mean(sDom_MacSlope_Coeffs)) Even_MacSlope_pVals.append(np.mean(sEven_MacSlope_pVals)) Even_MacSlope_Coeffs.append(np.mean(sEven_MacSlope_Coeffs)) Rare_MicSlope_pVals.append(np.mean(sRare_MicSlope_pVals)) Rare_MicSlope_Coeffs.append(np.mean(sRare_MicSlope_Coeffs)) Rich_MicSlope_pVals.append(np.mean(sRich_MicSlope_pVals)) Rich_MicSlope_Coeffs.append(np.mean(sRich_MicSlope_Coeffs)) Dom_MicSlope_pVals.append(np.mean(sDom_MicSlope_pVals)) Dom_MicSlope_Coeffs.append(np.mean(sDom_MicSlope_Coeffs)) Even_MicSlope_pVals.append(np.mean(sEven_MicSlope_pVals)) Even_MicSlope_Coeffs.append(np.mean(sEven_MicSlope_Coeffs)) RareR2List.append(np.mean(sRareR2List)) RarepFList.append(np.mean(sRarepFList)) RichR2List.append(np.mean(sRichR2List)) RichpFList.append(np.mean(sRichpFList)) DomR2List.append(np.mean(sDomR2List)) DompFList.append(np.mean(sDompFList)) EvenR2List.append(np.mean(sEvenR2List)) EvenpFList.append(np.mean(sEvenpFList)) # ASSUMPTIONS OF LINEAR REGRESSION # 1. Error in predictor variables is negligible...presumably yes # 2. Variables are measured at the continuous level...yes # 3. The relationship is linear #RarepLinListHC.append(np.mean(sRarepLinListHC)) RarepLinListRainB.append(np.mean(sRarepLinListRainB)) RarepLinListLM.append(np.mean(sRarepLinListLM)) #RichpLinListHC.append(np.mean(sRichpLinListHC)) RichpLinListRainB.append(np.mean(sRichpLinListRainB)) RichpLinListLM.append(np.mean(sRichpLinListLM)) #DompLinListHC.append(np.mean(sDompLinListHC)) DompLinListRainB.append(np.mean(sDompLinListRainB)) DompLinListLM.append(np.mean(sDompLinListLM)) #EvenpLinListHC.append(np.mean(sEvenpLinListHC)) EvenpLinListRainB.append(np.mean(sEvenpLinListRainB)) EvenpLinListLM.append(np.mean(sEvenpLinListLM)) # 4. There are no significant outliers...need to find tests or measures # 5. Independence of observations (no serial correlation in residuals) RarepCorrListBG.append(np.mean(sRarepCorrListBG)) RarepCorrListF.append(np.mean(sRarepCorrListF)) RichpCorrListBG.append(np.mean(sRichpCorrListBG)) RichpCorrListF.append(np.mean(sRichpCorrListF)) DompCorrListBG.append(np.mean(sDompCorrListBG)) DompCorrListF.append(np.mean(sDompCorrListF)) EvenpCorrListBG.append(np.mean(sEvenpCorrListBG)) EvenpCorrListF.append(np.mean(sEvenpCorrListF)) # 6. Homoscedacticity RarepHomoHW.append(np.mean(sRarepHomoHW)) RarepHomoHB.append(np.mean(sRarepHomoHB)) RichpHomoHB.append(np.mean(sRichpHomoHB)) RichpHomoHW.append(np.mean(sRichpHomoHW)) DompHomoHW.append(np.mean(sDompHomoHW)) DompHomoHB.append(np.mean(sDompHomoHB)) EvenpHomoHW.append(np.mean(sEvenpHomoHW)) EvenpHomoHB.append(np.mean(sEvenpHomoHB)) # 7. Normally distributed residuals (errors) RarepNormListOmni.append(np.mean(sRarepNormListOmni)) RarepNormListJB.append(np.mean(sRarepNormListJB)) RarepNormListKS.append(np.mean(sRarepNormListKS)) RarepNormListAD.append(np.mean(sRarepNormListAD)) RichpNormListOmni.append(np.mean(sRichpNormListOmni)) RichpNormListJB.append(np.mean(sRichpNormListJB)) RichpNormListKS.append(np.mean(sRichpNormListKS)) RichpNormListAD.append(np.mean(sRichpNormListAD)) DompNormListOmni.append(np.mean(sDompNormListOmni)) DompNormListJB.append(np.mean(sDompNormListJB)) DompNormListKS.append(np.mean(sDompNormListKS)) DompNormListAD.append(np.mean(sDompNormListAD)) EvenpNormListOmni.append(np.mean(sEvenpNormListOmni)) EvenpNormListJB.append(np.mean(sEvenpNormListJB)) EvenpNormListKS.append(np.mean(sEvenpNormListKS)) EvenpNormListAD.append(np.mean(sEvenpNormListAD)) fig.add_subplot(4, 3, 1) plt.xlim(min(SampSizes)-1,max(SampSizes)+10) plt.ylim(0,1) plt.xscale('log') # Rarity R2 vs. Sample Size plt.plot(NLIST,RareR2List, c='0.2', ls='--', lw=2, label=r'$R^2$') plt.ylabel(r'$R^2$', fontsize=14) plt.text(1.01, 0.6, 'Rarity', rotation='vertical', fontsize=16) leg = plt.legend(loc=4,prop={'size':14}) leg.draw_frame(False) fig.add_subplot(4, 3, 2) plt.xlim(min(SampSizes)-1, max(SampSizes)+10) plt.xscale('log') plt.ylim(0.0, 0.16) # Rarity Coeffs vs. Sample Size plt.plot(NLIST, Rare_MicSlope_Coeffs, c='r', lw=2, label='Microbe') plt.plot(NLIST, Rare_MacSlope_Coeffs, c='b', lw=2, label='Macrobe') #plt.plot(NLIST, RareIntCoeffList, c='g', label='Interaction') plt.ylabel('Coefficient') leg = plt.legend(loc=10,prop={'size':8}) leg.draw_frame(False) fig.add_subplot(4, 3, 3) plt.xlim(min(SampSizes)-1, max(SampSizes)+10) plt.ylim(0.0, 0.6) plt.xscale('log') # Rarity p-vals vs. Sample Size # 3. The relationship is linear #plt.plot(RarepLinListHC, NLIST, c='m', alpha=0.8) #plt.plot(NLIST,RarepLinListRainB, c='m') plt.plot(NLIST,RarepLinListLM, c='m', ls='-', label='linearity') # 5. Independence of observations (no serial correlation in residuals) #plt.plot(NLIST,RarepCorrListBG, c='c') plt.plot(NLIST,RarepCorrListF, c='c', ls='-', label='autocorrelation') # 6. Homoscedacticity plt.plot(NLIST,RarepHomoHW, c='orange', ls='-', label='homoscedasticity') #plt.plot(NLIST,RarepHomoHB, c='r', ls='-') # 7. Normally distributed residuals (errors) plt.plot(NLIST,RarepNormListOmni, c='Lime', ls='-', label='normality') #plt.plot(NLIST,RarepNormListJB, c='Lime', ls='-') #plt.plot(NLIST,RarepNormListKS, c='Lime', ls='--', lw=3) #plt.plot(NLIST,RarepNormListAD, c='Lime', ls='--') plt.plot([1, 100], [0.05, 0.05], c='0.2', ls='--') plt.ylabel('p-value') leg = plt.legend(loc=1,prop={'size':8}) leg.draw_frame(False) fig.add_subplot(4, 3, 4) plt.xscale('log') plt.ylim(0,1) plt.xlim(min(SampSizes)-1, max(SampSizes)+10) # Dominance R2 vs. Sample Size plt.plot(NLIST, DomR2List, c='0.2', ls='--', lw=2, label=r'$R^2$') plt.ylabel(r'$R^2$', fontsize=14) plt.text(1.01, 0.82, 'Dominance', rotation='vertical', fontsize=16) leg = plt.legend(loc=4,prop={'size':14}) leg.draw_frame(False) fig.add_subplot(4, 3, 5) plt.ylim(-0.2, 1.2) plt.xscale('log') plt.xlim(min(SampSizes)-1, max(SampSizes)+10) # Dominance Coeffs vs. Sample Size plt.plot(NLIST, Dom_MicSlope_Coeffs, c='r', lw=2, label='Microbe') plt.plot(NLIST, Dom_MacSlope_Coeffs, c='b', lw=2, label='Macrobe') #plt.plot(NLIST, DomIntCoeffList, c='g', label='Interaction') plt.ylabel('Coefficient') leg = plt.legend(loc=10,prop={'size':8}) leg.draw_frame(False) fig.add_subplot(4, 3, 6) plt.xlim(min(SampSizes)-1, max(SampSizes)+10) plt.xscale('log') #plt.yscale('log') plt.ylim(0, 0.6) # Dominance p-vals vs. Sample Size # 3. The relationship is linear #plt.plot(DompLinListHC, NLIST, c='m', alpha=0.8) #plt.plot(NLIST, DompLinListRainB, c='m') plt.plot(NLIST, DompLinListLM, c='m', ls='-', label='linearity') # 5. Independence of observations (no serial correlation in residuals) #plt.plot(NLIST, DompCorrListBG, c='c') plt.plot(NLIST, DompCorrListF, c='c', ls='-', label='autocorrelation') # 6. Homoscedacticity plt.plot(NLIST, DompHomoHW, c='orange', ls='-', label='homoscedasticity') #plt.plot(NLIST, DompHomoHB, c='r',ls='-') # 7. Normally distributed residuals (errors) plt.plot(NLIST, DompNormListOmni, c='Lime', ls='-', label='normality') #plt.plot(NLIST, DompNormListJB, c='Lime', ls='-') #plt.plot(NLIST, DompNormListKS, c='Lime', ls='--', lw=3) #plt.plot(NLIST, DompNormListAD, c='Lime', ls='--') plt.plot([1, 100], [0.05, 0.05], c='0.2', ls='--') plt.ylabel('p-value') leg = plt.legend(loc=1,prop={'size':8}) leg.draw_frame(False) fig.add_subplot(4, 3, 7) plt.text(1.01, 0.7, 'Evenness', rotation='vertical', fontsize=16) plt.xscale('log') plt.ylim(0,1) plt.xlim(min(SampSizes)-1, max(SampSizes)+10) # Evenness R2 vs. Sample Size plt.plot(NLIST, EvenR2List, c='0.2', ls='--', lw=2, label=r'$R^2$') plt.ylabel(r'$R^2$', fontsize=14) leg = plt.legend(loc=4,prop={'size':14}) leg.draw_frame(False) fig.add_subplot(4, 3, 8) plt.ylim(-0.25, 0.0) plt.xscale('log') plt.xlim(min(SampSizes)-1, max(SampSizes)+10) # Evenness Coeffs vs. Sample Size plt.plot(NLIST, Even_MicSlope_Coeffs, c='r', lw=2, label='Microbe') plt.plot(NLIST, Even_MacSlope_Coeffs, c='b', lw=2, label='Macrobe') #plt.plot(NLIST, EvenIntCoeffList, c='g', label='Interaction') plt.ylabel('Coefficient') leg = plt.legend(loc=10,prop={'size':8}) leg.draw_frame(False) fig.add_subplot(4, 3, 9) plt.xlim(min(SampSizes)-1, max(SampSizes)+10) plt.xscale('log') plt.ylim(0.0, 0.3) # Evenness p-vals vs. Sample Size # 3. The relationship is linear #plt.plot(EvenpLinListHC, NLIST, c='m', alpha=0.8) #plt.plot(NLIST, EvenpLinListRainB, c='m') plt.plot(NLIST, EvenpLinListLM, c='m', ls='-', label='linearity') # 5. Independence of observations (no serial correlation in residuals) #plt.plot(NLIST, EvenpCorrListBG, c='c') plt.plot(NLIST, EvenpCorrListF, c='c', ls='-', label='autocorrelation') # 6. Homoscedacticity plt.plot(NLIST, EvenpHomoHW, c='orange', ls='-', label='homoscedasticity') #plt.plot(NLIST, EvenpHomoHB, c='r', ls='-') # 7. Normally distributed residuals (errors) plt.plot(NLIST, EvenpNormListOmni, c='Lime', ls='-', label='normality') #plt.plot(NLIST, EvenpNormListJB, c='Lime', alpha=0.9, ls='-') #plt.plot(NLIST, EvenpNormListKS, c='Lime', alpha=0.9, ls='--', lw=3) #plt.plot(NLIST, EvenpNormListAD, c='Lime', alpha=0.9, ls='--') plt.plot([1, 100], [0.05, 0.05], c='0.2', ls='--') plt.ylabel('p-value') leg = plt.legend(loc=1,prop={'size':8}) leg.draw_frame(False) fig.add_subplot(4, 3, 10) plt.xscale('log') plt.ylim(0,1) plt.xlim(min(SampSizes)-1, max(SampSizes)+10) # Dominance R2 vs. Sample Size plt.plot(NLIST, RichR2List, c='0.2', ls='--', lw=2, label=r'$R^2$') plt.ylabel(r'$R^2$', fontsize=14) plt.xlabel('Sample size', fontsize=14) plt.text(1.01, 0.82, 'Richness', rotation='vertical', fontsize=16) leg = plt.legend(loc=4,prop={'size':14}) leg.draw_frame(False) fig.add_subplot(4, 3, 11) plt.ylim(-0.2, 1.2) plt.xscale('log') plt.xlim(min(SampSizes)-1, max(SampSizes)+10) # Richness Coeffs vs. Sample Size plt.plot(NLIST, Rich_MicSlope_Coeffs, c='r', lw=2, label='Microbe') plt.plot(NLIST, Rich_MacSlope_Coeffs, c='b', lw=2, label='Macrobe') #plt.plot(NLIST, RichIntCoeffList, c='g', label='Interaction') plt.ylabel('Coefficient') plt.xlabel('Sample size', fontsize=14) leg = plt.legend(loc=10,prop={'size':8}) leg.draw_frame(False) fig.add_subplot(4, 3, 12) plt.xlim(min(SampSizes)-1, max(SampSizes)+10) plt.xscale('log') # Richness p-vals vs. Sample Size # 3. The relationship is linear #plt.plot(RichpLinListHC, NLIST, c='m', alpha=0.8) #plt.plot(NLIST,RichpLinListRainB, c='m') plt.plot(NLIST,RichpLinListLM, c='m', ls='-', label='linearity') # 5. Independence of observations (no serial correlation in residuals) #plt.plot(NLIST,RichpCorrListBG, c='c') plt.plot(NLIST, EvenpCorrListF, c='c', ls='-', label='autocorrelation') # 6. Homoscedacticity plt.plot(NLIST,RichpHomoHW, c='orange', ls='-', label='homoscedasticity') #plt.plot(NLIST,RichpHomoHB, c='r', ls='-') # 7. Normally distributed residuals (errors) plt.plot(NLIST,RichpNormListOmni, c='Lime', ls='-', label='normality') #plt.plot(NLIST,RichpNormListJB, c='Lime', ls='-') #plt.plot(NLIST,RichpNormListKS, c='Lime', ls='--', lw=3) #plt.plot(NLIST,RichpNormListAD, c='Lime', ls='--') plt.plot([1, 100], [0.05, 0.05], c='0.2', ls='--') plt.ylabel('p-value') plt.xlabel('Sample size', fontsize=14) leg = plt.legend(loc=1,prop={'size':8}) leg.draw_frame(False) #plt.tick_params(axis='both', which='major', labelsize=fs-3) plt.subplots_adjust(wspace=0.4, hspace=0.4) plt.savefig(mydir+'figs/appendix/SampleSize/SampleSizeEffects.png', dpi=600, bbox_inches = "tight") #plt.close() #plt.show() return
def Fig_OLS_Checks(): #fs = 10 # font size used across figures #color = str() #OrC = 'open' SampSizes = [ 5, 6, 7, 8, 9, 10, 13, 16, 20, 30, 40, 50, 60, 70, 80, 90, 100 ] Iterations = 100 fig = plt.figure(figsize=(12, 8)) # MODEL PARAMETERS Rare_MacIntercept_pVals = [] # List to hold coefficient p-values Rare_MacIntercept_Coeffs = [] # List to hold coefficients Rich_MacIntercept_pVals = [] Rich_MacIntercept_Coeffs = [] Dom_MacIntercept_pVals = [] Dom_MacIntercept_Coeffs = [] Even_MacIntercept_pVals = [] Even_MacIntercept_Coeffs = [] Rare_MicIntercept_pVals = [] Rare_MicIntercept_Coeffs = [] Rich_MicIntercept_pVals = [] Rich_MicIntercept_Coeffs = [] Dom_MicIntercept_pVals = [] Dom_MicIntercept_Coeffs = [] Even_MicIntercept_pVals = [] Even_MicIntercept_Coeffs = [] Rare_MacSlope_pVals = [] Rare_MacSlope_Coeffs = [] Rich_MacSlope_pVals = [] Rich_MacSlope_Coeffs = [] Dom_MacSlope_pVals = [] Dom_MacSlope_Coeffs = [] Even_MacSlope_pVals = [] Even_MacSlope_Coeffs = [] Rare_MicSlope_pVals = [] Rare_MicSlope_Coeffs = [] Rich_MicSlope_pVals = [] Rich_MicSlope_Coeffs = [] Dom_MicSlope_pVals = [] Dom_MicSlope_Coeffs = [] Even_MicSlope_pVals = [] Even_MicSlope_Coeffs = [] RareR2List = [] # List to hold model R2 RarepFList = [] # List to hold significance of model R2 RichR2List = [] # List to hold model R2 RichpFList = [] # List to hold significance of model R2 DomR2List = [] # List to hold model R2 DompFList = [] # List to hold significance of model R2 EvenR2List = [] # List to hold model R2 EvenpFList = [] # List to hold significance of model R2 # ASSUMPTIONS OF LINEAR REGRESSION # 1. Error in predictor variables is negligible...presumably yes # 2. Variables are measured at the continuous level...yes # 3. The relationship is linear #RarepLinListHC = [] RarepLinListRainB = [] RarepLinListLM = [] #RichpLinListHC = [] RichpLinListRainB = [] RichpLinListLM = [] #DompLinListHC = [] DompLinListRainB = [] DompLinListLM = [] #EvenpLinListHC = [] EvenpLinListRainB = [] EvenpLinListLM = [] # 4. There are no significant outliers...need to find tests or measures # 5. Independence of observations (no serial correlation in residuals) RarepCorrListBG = [] RarepCorrListF = [] RichpCorrListBG = [] RichpCorrListF = [] DompCorrListBG = [] DompCorrListF = [] EvenpCorrListBG = [] EvenpCorrListF = [] # 6. Homoscedacticity RarepHomoHW = [] RarepHomoHB = [] RichpHomoHW = [] RichpHomoHB = [] DompHomoHW = [] DompHomoHB = [] EvenpHomoHW = [] EvenpHomoHB = [] # 7. Normally distributed residuals (errors) RarepNormListOmni = [] # Omnibus test for normality RarepNormListJB = [ ] # Calculate residual skewness, kurtosis, and do the JB test for normality RarepNormListKS = [ ] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance RarepNormListAD = [ ] # Anderson-Darling test for normal distribution unknown mean and variance RichpNormListOmni = [] # Omnibus test for normality RichpNormListJB = [ ] # Calculate residual skewness, kurtosis, and do the JB test for normality RichpNormListKS = [ ] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance RichpNormListAD = [ ] # Anderson-Darling test for normal distribution unknown mean and variance DompNormListOmni = [] # Omnibus test for normality DompNormListJB = [ ] # Calculate residual skewness, kurtosis, and do the JB test for normality DompNormListKS = [ ] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance DompNormListAD = [ ] # Anderson-Darling test for normal distribution unknown mean and variance EvenpNormListOmni = [] # Omnibus test for normality EvenpNormListJB = [ ] # Calculate residual skewness, kurtosis, and do the JB test for normality EvenpNormListKS = [ ] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance EvenpNormListAD = [ ] # Anderson-Darling test for normal distribution unknown mean and variance NLIST = [] for SampSize in SampSizes: sRare_MacIntercept_pVals = [] # List to hold coefficient p-values sRare_MacIntercept_Coeffs = [] # List to hold coefficients sRich_MacIntercept_pVals = [] # List to hold coefficient p-values sRich_MacIntercept_Coeffs = [] # List to hold coefficients sDom_MacIntercept_pVals = [] sDom_MacIntercept_Coeffs = [] sEven_MacIntercept_pVals = [] sEven_MacIntercept_Coeffs = [] sRare_MicIntercept_pVals = [] sRare_MicIntercept_Coeffs = [] sRich_MicIntercept_pVals = [] sRich_MicIntercept_Coeffs = [] sDom_MicIntercept_pVals = [] sDom_MicIntercept_Coeffs = [] sEven_MicIntercept_pVals = [] sEven_MicIntercept_Coeffs = [] sRare_MacSlope_pVals = [] sRare_MacSlope_Coeffs = [] sRich_MacSlope_pVals = [] sRich_MacSlope_Coeffs = [] sDom_MacSlope_pVals = [] sDom_MacSlope_Coeffs = [] sEven_MacSlope_pVals = [] sEven_MacSlope_Coeffs = [] sRare_MicSlope_pVals = [] sRare_MicSlope_Coeffs = [] sRich_MicSlope_pVals = [] sRich_MicSlope_Coeffs = [] sDom_MicSlope_pVals = [] sDom_MicSlope_Coeffs = [] sEven_MicSlope_pVals = [] sEven_MicSlope_Coeffs = [] sRareR2List = [] # List to hold model R2 sRarepFList = [] # List to hold significance of model R2 sRichR2List = [] # List to hold model R2 sRichpFList = [] # List to hold significance of model R2 sDomR2List = [] # List to hold model R2 sDompFList = [] # List to hold significance of model R2 sEvenR2List = [] # List to hold model R2 sEvenpFList = [] # List to hold significance of model R2 # ASSUMPTIONS OF LINEAR REGRESSION # 1. Error in predictor variables is negligible...presumably yes # 2. Variables are measured at the continuous level...yes # 3. The relationship is linear #sRarepLinListHC = [] sRarepLinListRainB = [] sRarepLinListLM = [] #sRichpLinListHC = [] sRichpLinListRainB = [] sRichpLinListLM = [] #sDompLinListHC = [] sDompLinListRainB = [] sDompLinListLM = [] #sEvenpLinListHC = [] sEvenpLinListRainB = [] sEvenpLinListLM = [] # 4. There are no significant outliers...need to find tests or measures # 5. Independence of observations (no serial correlation in residuals) sRarepCorrListBG = [] sRarepCorrListF = [] sRichpCorrListBG = [] sRichpCorrListF = [] sDompCorrListBG = [] sDompCorrListF = [] sEvenpCorrListBG = [] sEvenpCorrListF = [] # 6. Homoscedacticity sRarepHomoHW = [] sRarepHomoHB = [] sRichpHomoHW = [] sRichpHomoHB = [] sDompHomoHW = [] sDompHomoHB = [] sEvenpHomoHW = [] sEvenpHomoHB = [] # 7. Normally distributed residuals (errors) sRarepNormListOmni = [] # Omnibus test for normality sRarepNormListJB = [ ] # Calculate residual skewness, kurtosis, and do the JB test for normality sRarepNormListKS = [ ] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance sRarepNormListAD = [ ] # Anderson-Darling test for normal distribution unknown mean and variance sRichpNormListOmni = [] # Omnibus test for normality sRichpNormListJB = [ ] # Calculate residual skewness, kurtosis, and do the JB test for normality sRichpNormListKS = [ ] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance sRichpNormListAD = [ ] # Anderson-Darling test for normal distribution unknown mean and variance sDompNormListOmni = [] # Omnibus test for normality sDompNormListJB = [ ] # Calculate residual skewness, kurtosis, and do the JB test for normality sDompNormListKS = [ ] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance sDompNormListAD = [ ] # Anderson-Darling test for normal distribution unknown mean and variance sEvenpNormListOmni = [] # Omnibus test for normality sEvenpNormListJB = [ ] # Calculate residual skewness, kurtosis, and do the JB test for normality sEvenpNormListKS = [ ] # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance sEvenpNormListAD = [ ] # Anderson-Darling test for normal distribution unknown mean and variance for iteration in range(Iterations): Nlist, Slist, Evarlist, ESimplist, ENeelist, EHeiplist, EQlist = [ [], [], [], [], [], [], [] ] klist, Shanlist, BPlist, SimpDomlist, SinglesList, tenlist, onelist = [ [], [], [], [], [], [], [] ] NmaxList, rareSkews, KindList = [[], [], []] NSlist = [] ct = 0 radDATA = [] datasets = [] GoodNames = [ 'EMPclosed', 'HMP', 'BIGN', 'TARA', 'BOVINE', 'HUMAN', 'LAUB', 'SED', 'CHU', 'CHINA', 'CATLIN', 'FUNGI', 'HYDRO', 'BBS', 'CBC', 'MCDB', 'GENTRY', 'FIA' ] # all microbe data is MGRAST mlist = ['micro', 'macro'] for m in mlist: for name in os.listdir(mydir + 'data/' + m): if name in GoodNames: pass else: continue path = mydir + 'data/' + m + '/' + name + '/' + name + '-SADMetricData.txt' num_lines = sum(1 for line in open(path)) datasets.append([name, m, num_lines]) numMac = 0 numMic = 0 radDATA = [] for d in datasets: name, kind, numlines = d lines = [] lines = np.random.choice(range(1, numlines + 1), SampSize, replace=True) path = mydir + 'data/' + kind + '/' + name + '/' + name + '-SADMetricData.txt' for line in lines: data = linecache.getline(path, line) radDATA.append(data) #print name, kind, numlines, len(radDATA) for data in radDATA: data = data.split() if len(data) == 0: print 'no data' continue name, kind, N, S, Var, Evar, ESimp, EQ, O, ENee, EPielou, EHeip, BP, SimpDom, Nmax, McN, skew, logskew, chao1, ace, jknife1, jknife2, margalef, menhinick, preston_a, preston_S = data N = float(N) S = float(S) Nlist.append(float(np.log(N))) Slist.append(float(np.log(S))) NSlist.append(float(np.log(N / S))) Evarlist.append(float(np.log(float(Evar)))) ESimplist.append(float(np.log(float(ESimp)))) KindList.append(kind) BPlist.append(float(BP)) NmaxList.append(float(np.log(float(BP) * float(N)))) EHeiplist.append(float(EHeip)) # lines for the log-modulo transformation of skewnness skew = float(skew) sign = 1 if skew < 0: sign = -1 lms = np.log(np.abs(skew) + 1) lms = lms * sign #if lms > 3: print name, N, S rareSkews.append(float(lms)) if kind == 'macro': numMac += 1 elif kind == 'micro': numMic += 1 ct += 1 #print 'Sample Size:',SampSize, ' Mic:', numMic,'Mac:', numMac # Multiple regression for Rarity d = pd.DataFrame({'N': list(Nlist)}) d['Rarity'] = list(rareSkews) d['Kind'] = list(KindList) RarityResults = smf.ols( 'Rarity ~ N * Kind', d).fit() # Fit the dummy variable regression model #print RarityResults.summary(), '\n' # Multiple regression for Rarity d = pd.DataFrame({'N': list(Nlist)}) d['Richness'] = list(Slist) d['Kind'] = list(KindList) RichnessResults = smf.ols( 'Richness ~ N * Kind', d).fit() # Fit the dummy variable regression model #print RichnessResults.summary(), '\n' # Multiple regression for Dominance d = pd.DataFrame({'N': list(Nlist)}) d['Dominance'] = list(NmaxList) d['Kind'] = list(KindList) DomResults = smf.ols( 'Dominance ~ N * Kind', d).fit() # Fit the dummy variable regression model #print DomResults.summary(), '\n' # Multiple regression for Evenness d = pd.DataFrame({'N': list(Nlist)}) d['Evenness'] = list(ESimplist) d['Kind'] = list(KindList) EvenResults = smf.ols( 'Evenness ~ N * Kind', d).fit() # Fit the dummy variable regression model #print RarityResults.summary(), '\n' RareResids = RarityResults.resid # residuals of the model RichResids = RichnessResults.resid # residuals of the model DomResids = DomResults.resid # residuals of the model EvenResids = EvenResults.resid # residuals of the model # MODEL RESULTS/FIT RareFpval = RarityResults.f_pvalue Rarer2 = RarityResults.rsquared # coefficient of determination #Adj_r2 = RareResults.rsquared_adj # adjusted RichFpval = RichnessResults.f_pvalue Richr2 = RichnessResults.rsquared # coefficient of determination #Adj_r2 = RichnessResults.rsquared_adj # adjusted DomFpval = DomResults.f_pvalue Domr2 = DomResults.rsquared # coefficient of determination #Adj_r2 = DomResults.rsquared_adj # adjusted EvenFpval = EvenResults.f_pvalue Evenr2 = EvenResults.rsquared # coefficient of determination #Adj_r2 = EvenResuls.rsquared_adj # adjusted # MODEL PARAMETERS and p-values Rareparams = RarityResults.params Rareparams = Rareparams.tolist() Rarepvals = RarityResults.pvalues Rarepvals = Rarepvals.tolist() Richparams = RichnessResults.params Richparams = Richparams.tolist() Richpvals = RichnessResults.pvalues Richpvals = Richpvals.tolist() Domparams = DomResults.params Domparams = Domparams.tolist() Dompvals = DomResults.pvalues Dompvals = Dompvals.tolist() Evenparams = EvenResults.params Evenparams = Evenparams.tolist() Evenpvals = EvenResults.pvalues Evenpvals = Evenpvals.tolist() sRare_MacIntercept_pVals.append(Rarepvals[0]) sRare_MacIntercept_Coeffs.append(Rareparams[0]) sRich_MacIntercept_pVals.append(Rarepvals[0]) sRich_MacIntercept_Coeffs.append(Rareparams[0]) sDom_MacIntercept_pVals.append(Dompvals[0]) sDom_MacIntercept_Coeffs.append(Domparams[0]) sEven_MacIntercept_pVals.append(Evenpvals[0]) sEven_MacIntercept_Coeffs.append(Evenparams[0]) sRare_MicIntercept_pVals.append(Rarepvals[1]) if Rarepvals[1] > 0.05: sRare_MicIntercept_Coeffs.append(Rareparams[1]) else: sRare_MicIntercept_Coeffs.append(Rareparams[1]) sRich_MicIntercept_pVals.append(Richpvals[1]) if Richpvals[1] > 0.05: sRich_MicIntercept_Coeffs.append(Richparams[1]) else: sRich_MicIntercept_Coeffs.append(Richparams[1]) sDom_MicIntercept_pVals.append(Dompvals[1]) if Dompvals[1] > 0.05: sDom_MicIntercept_Coeffs.append(Domparams[1]) else: sDom_MicIntercept_Coeffs.append(Domparams[1]) sEven_MicIntercept_pVals.append(Evenpvals[1]) if Evenpvals[1] > 0.05: sEven_MicIntercept_Coeffs.append(Evenparams[1]) else: sEven_MicIntercept_Coeffs.append(Evenparams[1]) sRare_MacSlope_pVals.append(Rarepvals[2]) sRare_MacSlope_Coeffs.append(Rareparams[2]) sRich_MacSlope_pVals.append(Richpvals[2]) sRich_MacSlope_Coeffs.append(Richparams[2]) sDom_MacSlope_pVals.append(Dompvals[2]) sDom_MacSlope_Coeffs.append(Domparams[2]) sEven_MacSlope_pVals.append(Evenpvals[2]) sEven_MacSlope_Coeffs.append(Evenparams[2]) sRare_MicSlope_pVals.append(Rarepvals[3]) if Rarepvals[3] > 0.05: sRare_MicSlope_Coeffs.append(Rareparams[3]) else: sRare_MicSlope_Coeffs.append(Rareparams[3]) sRich_MicSlope_pVals.append(Richpvals[3]) if Richpvals[3] > 0.05: sRich_MicSlope_Coeffs.append(Richparams[3]) else: sRich_MicSlope_Coeffs.append(Richparams[3]) sDom_MicSlope_pVals.append(Dompvals[3]) if Dompvals[3] > 0.05: sDom_MicSlope_Coeffs.append(Domparams[3]) else: sDom_MicSlope_Coeffs.append(Domparams[3]) sEven_MicSlope_pVals.append(Evenpvals[3]) if Evenpvals[3] > 0.05: sEven_MicSlope_Coeffs.append(Evenparams[3]) else: sEven_MicSlope_Coeffs.append(Evenparams[3]) sRareR2List.append(Rarer2) sRarepFList.append(RareFpval) sRichR2List.append(Richr2) sRichpFList.append(RichFpval) sDomR2List.append(Domr2) sDompFList.append(DomFpval) sEvenR2List.append(Evenr2) sEvenpFList.append(EvenFpval) # TESTS OF LINEAR REGRESSION ASSUMPTIONS # Error in predictor variables is negligible...Presumably Yes # Variables are measured at the continuous level...Definitely Yes # TESTS FOR LINEARITY, i.e., WHETHER THE DATA ARE CORRECTLY MODELED AS LINEAR #HC = smd.linear_harvey_collier(RarityResults) # Harvey Collier test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. #sRarepLinListHC.append(HC) #HC = smd.linear_harvey_collier(DomResults) # Harvey Collier test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. #sDompLinListHC.append(HC) #HC = smd.linear_harvey_collier(EvenResults) # Harvey Collier test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. #sEvenpLinListHC.append(HC) RB = smd.linear_rainbow( RarityResults ) # Rainbow test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. sRarepLinListRainB.append(RB[1]) RB = smd.linear_rainbow( RichnessResults ) # Rainbow test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. sRichpLinListRainB.append(RB[1]) RB = smd.linear_rainbow( DomResults ) # Rainbow test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. sDompLinListRainB.append(RB[1]) RB = smd.linear_rainbow( EvenResults ) # Rainbow test for linearity. The Null hypothesis is that the regression is correctly modeled as linear. sEvenpLinListRainB.append(RB[1]) LM = smd.linear_lm(RarityResults.resid, RarityResults.model.exog ) # Lagrangian multiplier test for linearity sRarepLinListLM.append(LM[1]) LM = smd.linear_lm(RichnessResults.resid, RichnessResults.model.exog ) # Lagrangian multiplier test for linearity sRichpLinListLM.append(LM[1]) LM = smd.linear_lm(DomResults.resid, DomResults.model.exog ) # Lagrangian multiplier test for linearity sDompLinListLM.append(LM[1]) LM = smd.linear_lm(EvenResults.resid, EvenResults.model.exog ) # Lagrangian multiplier test for linearity sEvenpLinListLM.append(LM[1]) # INDEPENDENCE OF OBSERVATIONS (no serial correlation in residuals) BGtest = smd.acorr_breush_godfrey( RarityResults, nlags=None, store=False ) # Breusch Godfrey Lagrange Multiplier tests for residual autocorrelation # Lagrange multiplier test statistic, p-value for Lagrange multiplier test, fstatistic for F test, pvalue for F test #BGtest = smd.acorr_ljungbox(RareResids, lags=None, boxpierce=True) sRarepCorrListBG.append(BGtest[1]) sRarepCorrListF.append(BGtest[3]) BGtest = smd.acorr_breush_godfrey( RichnessResults, nlags=None, store=False ) # Breusch Godfrey Lagrange Multiplier tests for residual autocorrelation # Lagrange multiplier test statistic, p-value for Lagrange multiplier test, fstatistic for F test, pvalue for F test #BGtest = smd.acorr_ljungbox(RichResids, lags=None, boxpierce=True) sRichpCorrListBG.append(BGtest[1]) sRichpCorrListF.append(BGtest[3]) BGtest = smd.acorr_breush_godfrey( DomResults, nlags=None, store=False ) # Breusch Godfrey Lagrange Multiplier tests for residual autocorrelation # Lagrange multiplier test statistic, p-value for Lagrange multiplier test, fstatistic for F test, pvalue for F test #BGtest = smd.acorr_ljungbox(DomResids, lags=None, boxpierce=True) sDompCorrListBG.append(BGtest[1]) sDompCorrListF.append(BGtest[3]) BGtest = smd.acorr_breush_godfrey( EvenResults, nlags=None, store=False ) # Breusch Godfrey Lagrange Multiplier tests for residual autocorrelation # Lagrange multiplier test statistic, p-value for Lagrange multiplier test, fstatistic for F test, pvalue for F test #BGtest = smd.acorr_ljungbox(EvenResids, lags=None, boxpierce=True) sEvenpCorrListBG.append(BGtest[1]) sEvenpCorrListF.append(BGtest[3]) # There are no significant outliers...Need tests or measures/metrics # HOMOSCEDASTICITY # These tests return: # 1. lagrange multiplier statistic, # 2. p-value of lagrange multiplier test, # 3. f-statistic of the hypothesis that the error variance does not depend on x, # 4. p-value for the f-statistic HW = sms.het_white(RareResids, RarityResults.model.exog) sRarepHomoHW.append(HW[3]) HW = sms.het_white(RichResids, RichnessResults.model.exog) sRichpHomoHW.append(HW[3]) HW = sms.het_white(DomResids, DomResults.model.exog) sDompHomoHW.append(HW[3]) HW = sms.het_white(EvenResids, EvenResults.model.exog) sEvenpHomoHW.append(HW[3]) HB = sms.het_breushpagan(RareResids, RarityResults.model.exog) sRarepHomoHB.append(HB[3]) HB = sms.het_breushpagan(RichResids, RichnessResults.model.exog) sRichpHomoHB.append(HB[3]) HB = sms.het_breushpagan(DomResids, DomResults.model.exog) sDompHomoHB.append(HB[3]) HB = sms.het_breushpagan(EvenResids, EvenResults.model.exog) sEvenpHomoHB.append(HB[3]) # 7. NORMALITY OF ERROR TERMS O = sms.omni_normtest(RareResids) sRarepNormListOmni.append(O[1]) O = sms.omni_normtest(RichResids) sRichpNormListOmni.append(O[1]) O = sms.omni_normtest(DomResids) sDompNormListOmni.append(O[1]) O = sms.omni_normtest(EvenResids) sEvenpNormListOmni.append(O[1]) JB = sms.jarque_bera(RareResids) sRarepNormListJB.append( JB[1] ) # Calculate residual skewness, kurtosis, and do the JB test for normality JB = sms.jarque_bera(RichResids) sRichpNormListJB.append( JB[1] ) # Calculate residual skewness, kurtosis, and do the JB test for normality JB = sms.jarque_bera(DomResids) sDompNormListJB.append( JB[1] ) # Calculate residual skewness, kurtosis, and do the JB test for normality JB = sms.jarque_bera(EvenResids) sEvenpNormListJB.append( JB[1] ) # Calculate residual skewness, kurtosis, and do the JB test for normality KS = smd.kstest_normal(RareResids) sRarepNormListKS.append( KS[1] ) # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance KS = smd.kstest_normal(RichResids) sRichpNormListKS.append( KS[1] ) # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance KS = smd.kstest_normal(DomResids) sDompNormListKS.append( KS[1] ) # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance KS = smd.kstest_normal(EvenResids) sEvenpNormListKS.append( KS[1] ) # Lillifors test for normality, Kolmogorov Smirnov test with estimated mean and variance AD = smd.normal_ad(RareResids) sRarepNormListAD.append( AD[1] ) # Anderson-Darling test for normal distribution unknown mean and variance AD = smd.normal_ad(RichResids) sRichpNormListAD.append( AD[1] ) # Anderson-Darling test for normal distribution unknown mean and variance AD = smd.normal_ad(DomResids) sDompNormListAD.append( AD[1] ) # Anderson-Darling test for normal distribution unknown mean and variance AD = smd.normal_ad(EvenResids) sEvenpNormListAD.append( AD[1] ) # Anderson-Darling test for normal distribution unknown mean and variance print 'Sample size:', SampSize, 'iteration:', iteration NLIST.append(SampSize) Rare_MacIntercept_pVals.append(np.mean( sRare_MacIntercept_pVals)) # List to hold coefficient p-values Rare_MacIntercept_Coeffs.append( np.mean(sRare_MacIntercept_Coeffs)) # List to hold coefficients Rich_MacIntercept_pVals.append(np.mean( sRich_MacIntercept_pVals)) # List to hold coefficient p-values Rich_MacIntercept_Coeffs.append( np.mean(sRich_MacIntercept_Coeffs)) # List to hold coefficients Dom_MacIntercept_pVals.append(np.mean(sDom_MacIntercept_pVals)) Dom_MacIntercept_Coeffs.append(np.mean(sDom_MacIntercept_Coeffs)) Even_MacIntercept_pVals.append(np.mean(sEven_MacIntercept_pVals)) Even_MacIntercept_Coeffs.append(np.mean(sEven_MacIntercept_Coeffs)) Rare_MicIntercept_pVals.append(np.mean(sRare_MicIntercept_pVals)) Rare_MicIntercept_Coeffs.append(np.mean(sRare_MicIntercept_Coeffs)) Rich_MicIntercept_pVals.append(np.mean(sRich_MicIntercept_pVals)) Rich_MicIntercept_Coeffs.append(np.mean(sRich_MicIntercept_Coeffs)) Dom_MicIntercept_pVals.append(np.mean(sDom_MicIntercept_pVals)) Dom_MicIntercept_Coeffs.append(np.mean(sDom_MicIntercept_Coeffs)) Even_MicIntercept_pVals.append(np.mean(sEven_MicIntercept_pVals)) Even_MicIntercept_Coeffs.append(np.mean(sEven_MicIntercept_Coeffs)) Rare_MacSlope_pVals.append( np.mean(sRare_MacSlope_pVals)) # List to hold coefficient p-values Rare_MacSlope_Coeffs.append( np.mean(sRare_MacSlope_Coeffs)) # List to hold coefficients Rich_MacSlope_pVals.append( np.mean(sRich_MacSlope_pVals)) # List to hold coefficient p-values Rich_MacSlope_Coeffs.append( np.mean(sRich_MacSlope_Coeffs)) # List to hold coefficients Dom_MacSlope_pVals.append(np.mean(sDom_MacSlope_pVals)) Dom_MacSlope_Coeffs.append(np.mean(sDom_MacSlope_Coeffs)) Even_MacSlope_pVals.append(np.mean(sEven_MacSlope_pVals)) Even_MacSlope_Coeffs.append(np.mean(sEven_MacSlope_Coeffs)) Rare_MicSlope_pVals.append(np.mean(sRare_MicSlope_pVals)) Rare_MicSlope_Coeffs.append(np.mean(sRare_MicSlope_Coeffs)) Rich_MicSlope_pVals.append(np.mean(sRich_MicSlope_pVals)) Rich_MicSlope_Coeffs.append(np.mean(sRich_MicSlope_Coeffs)) Dom_MicSlope_pVals.append(np.mean(sDom_MicSlope_pVals)) Dom_MicSlope_Coeffs.append(np.mean(sDom_MicSlope_Coeffs)) Even_MicSlope_pVals.append(np.mean(sEven_MicSlope_pVals)) Even_MicSlope_Coeffs.append(np.mean(sEven_MicSlope_Coeffs)) RareR2List.append(np.mean(sRareR2List)) RarepFList.append(np.mean(sRarepFList)) RichR2List.append(np.mean(sRichR2List)) RichpFList.append(np.mean(sRichpFList)) DomR2List.append(np.mean(sDomR2List)) DompFList.append(np.mean(sDompFList)) EvenR2List.append(np.mean(sEvenR2List)) EvenpFList.append(np.mean(sEvenpFList)) # ASSUMPTIONS OF LINEAR REGRESSION # 1. Error in predictor variables is negligible...presumably yes # 2. Variables are measured at the continuous level...yes # 3. The relationship is linear #RarepLinListHC.append(np.mean(sRarepLinListHC)) RarepLinListRainB.append(np.mean(sRarepLinListRainB)) RarepLinListLM.append(np.mean(sRarepLinListLM)) #RichpLinListHC.append(np.mean(sRichpLinListHC)) RichpLinListRainB.append(np.mean(sRichpLinListRainB)) RichpLinListLM.append(np.mean(sRichpLinListLM)) #DompLinListHC.append(np.mean(sDompLinListHC)) DompLinListRainB.append(np.mean(sDompLinListRainB)) DompLinListLM.append(np.mean(sDompLinListLM)) #EvenpLinListHC.append(np.mean(sEvenpLinListHC)) EvenpLinListRainB.append(np.mean(sEvenpLinListRainB)) EvenpLinListLM.append(np.mean(sEvenpLinListLM)) # 4. There are no significant outliers...need to find tests or measures # 5. Independence of observations (no serial correlation in residuals) RarepCorrListBG.append(np.mean(sRarepCorrListBG)) RarepCorrListF.append(np.mean(sRarepCorrListF)) RichpCorrListBG.append(np.mean(sRichpCorrListBG)) RichpCorrListF.append(np.mean(sRichpCorrListF)) DompCorrListBG.append(np.mean(sDompCorrListBG)) DompCorrListF.append(np.mean(sDompCorrListF)) EvenpCorrListBG.append(np.mean(sEvenpCorrListBG)) EvenpCorrListF.append(np.mean(sEvenpCorrListF)) # 6. Homoscedacticity RarepHomoHW.append(np.mean(sRarepHomoHW)) RarepHomoHB.append(np.mean(sRarepHomoHB)) RichpHomoHB.append(np.mean(sRichpHomoHB)) RichpHomoHW.append(np.mean(sRichpHomoHW)) DompHomoHW.append(np.mean(sDompHomoHW)) DompHomoHB.append(np.mean(sDompHomoHB)) EvenpHomoHW.append(np.mean(sEvenpHomoHW)) EvenpHomoHB.append(np.mean(sEvenpHomoHB)) # 7. Normally distributed residuals (errors) RarepNormListOmni.append(np.mean(sRarepNormListOmni)) RarepNormListJB.append(np.mean(sRarepNormListJB)) RarepNormListKS.append(np.mean(sRarepNormListKS)) RarepNormListAD.append(np.mean(sRarepNormListAD)) RichpNormListOmni.append(np.mean(sRichpNormListOmni)) RichpNormListJB.append(np.mean(sRichpNormListJB)) RichpNormListKS.append(np.mean(sRichpNormListKS)) RichpNormListAD.append(np.mean(sRichpNormListAD)) DompNormListOmni.append(np.mean(sDompNormListOmni)) DompNormListJB.append(np.mean(sDompNormListJB)) DompNormListKS.append(np.mean(sDompNormListKS)) DompNormListAD.append(np.mean(sDompNormListAD)) EvenpNormListOmni.append(np.mean(sEvenpNormListOmni)) EvenpNormListJB.append(np.mean(sEvenpNormListJB)) EvenpNormListKS.append(np.mean(sEvenpNormListKS)) EvenpNormListAD.append(np.mean(sEvenpNormListAD)) fig.add_subplot(4, 3, 1) plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) plt.ylim(0, 1) plt.xscale('log') # Rarity R2 vs. Sample Size plt.plot(NLIST, RareR2List, c='0.2', ls='--', lw=2, label=r'$R^2$') plt.ylabel(r'$R^2$', fontsize=14) plt.text(1.01, 0.6, 'Rarity', rotation='vertical', fontsize=16) leg = plt.legend(loc=4, prop={'size': 14}) leg.draw_frame(False) fig.add_subplot(4, 3, 2) plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) plt.xscale('log') plt.ylim(0.0, 0.16) # Rarity Coeffs vs. Sample Size plt.plot(NLIST, Rare_MicSlope_Coeffs, c='r', lw=2, label='Microbe') plt.plot(NLIST, Rare_MacSlope_Coeffs, c='b', lw=2, label='Macrobe') #plt.plot(NLIST, RareIntCoeffList, c='g', label='Interaction') plt.ylabel('Coefficient') leg = plt.legend(loc=10, prop={'size': 8}) leg.draw_frame(False) fig.add_subplot(4, 3, 3) plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) plt.ylim(0.0, 0.6) plt.xscale('log') # Rarity p-vals vs. Sample Size # 3. The relationship is linear #plt.plot(RarepLinListHC, NLIST, c='m', alpha=0.8) #plt.plot(NLIST,RarepLinListRainB, c='m') plt.plot(NLIST, RarepLinListLM, c='m', ls='-', label='linearity') # 5. Independence of observations (no serial correlation in residuals) #plt.plot(NLIST,RarepCorrListBG, c='c') plt.plot(NLIST, RarepCorrListF, c='c', ls='-', label='autocorrelation') # 6. Homoscedacticity plt.plot(NLIST, RarepHomoHW, c='orange', ls='-', label='homoscedasticity') #plt.plot(NLIST,RarepHomoHB, c='r', ls='-') # 7. Normally distributed residuals (errors) plt.plot(NLIST, RarepNormListOmni, c='Lime', ls='-', label='normality') #plt.plot(NLIST,RarepNormListJB, c='Lime', ls='-') #plt.plot(NLIST,RarepNormListKS, c='Lime', ls='--', lw=3) #plt.plot(NLIST,RarepNormListAD, c='Lime', ls='--') plt.plot([1, 100], [0.05, 0.05], c='0.2', ls='--') plt.ylabel('p-value') leg = plt.legend(loc=1, prop={'size': 8}) leg.draw_frame(False) fig.add_subplot(4, 3, 4) plt.xscale('log') plt.ylim(0, 1) plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) # Dominance R2 vs. Sample Size plt.plot(NLIST, DomR2List, c='0.2', ls='--', lw=2, label=r'$R^2$') plt.ylabel(r'$R^2$', fontsize=14) plt.text(1.01, 0.82, 'Dominance', rotation='vertical', fontsize=16) leg = plt.legend(loc=4, prop={'size': 14}) leg.draw_frame(False) fig.add_subplot(4, 3, 5) plt.ylim(-0.2, 1.2) plt.xscale('log') plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) # Dominance Coeffs vs. Sample Size plt.plot(NLIST, Dom_MicSlope_Coeffs, c='r', lw=2, label='Microbe') plt.plot(NLIST, Dom_MacSlope_Coeffs, c='b', lw=2, label='Macrobe') #plt.plot(NLIST, DomIntCoeffList, c='g', label='Interaction') plt.ylabel('Coefficient') leg = plt.legend(loc=10, prop={'size': 8}) leg.draw_frame(False) fig.add_subplot(4, 3, 6) plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) plt.xscale('log') #plt.yscale('log') plt.ylim(0, 0.6) # Dominance p-vals vs. Sample Size # 3. The relationship is linear #plt.plot(DompLinListHC, NLIST, c='m', alpha=0.8) #plt.plot(NLIST, DompLinListRainB, c='m') plt.plot(NLIST, DompLinListLM, c='m', ls='-', label='linearity') # 5. Independence of observations (no serial correlation in residuals) #plt.plot(NLIST, DompCorrListBG, c='c') plt.plot(NLIST, DompCorrListF, c='c', ls='-', label='autocorrelation') # 6. Homoscedacticity plt.plot(NLIST, DompHomoHW, c='orange', ls='-', label='homoscedasticity') #plt.plot(NLIST, DompHomoHB, c='r',ls='-') # 7. Normally distributed residuals (errors) plt.plot(NLIST, DompNormListOmni, c='Lime', ls='-', label='normality') #plt.plot(NLIST, DompNormListJB, c='Lime', ls='-') #plt.plot(NLIST, DompNormListKS, c='Lime', ls='--', lw=3) #plt.plot(NLIST, DompNormListAD, c='Lime', ls='--') plt.plot([1, 100], [0.05, 0.05], c='0.2', ls='--') plt.ylabel('p-value') leg = plt.legend(loc=1, prop={'size': 8}) leg.draw_frame(False) fig.add_subplot(4, 3, 7) plt.text(1.01, 0.7, 'Evenness', rotation='vertical', fontsize=16) plt.xscale('log') plt.ylim(0, 1) plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) # Evenness R2 vs. Sample Size plt.plot(NLIST, EvenR2List, c='0.2', ls='--', lw=2, label=r'$R^2$') plt.ylabel(r'$R^2$', fontsize=14) leg = plt.legend(loc=4, prop={'size': 14}) leg.draw_frame(False) fig.add_subplot(4, 3, 8) plt.ylim(-0.25, 0.0) plt.xscale('log') plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) # Evenness Coeffs vs. Sample Size plt.plot(NLIST, Even_MicSlope_Coeffs, c='r', lw=2, label='Microbe') plt.plot(NLIST, Even_MacSlope_Coeffs, c='b', lw=2, label='Macrobe') #plt.plot(NLIST, EvenIntCoeffList, c='g', label='Interaction') plt.ylabel('Coefficient') leg = plt.legend(loc=10, prop={'size': 8}) leg.draw_frame(False) fig.add_subplot(4, 3, 9) plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) plt.xscale('log') plt.ylim(0.0, 0.3) # Evenness p-vals vs. Sample Size # 3. The relationship is linear #plt.plot(EvenpLinListHC, NLIST, c='m', alpha=0.8) #plt.plot(NLIST, EvenpLinListRainB, c='m') plt.plot(NLIST, EvenpLinListLM, c='m', ls='-', label='linearity') # 5. Independence of observations (no serial correlation in residuals) #plt.plot(NLIST, EvenpCorrListBG, c='c') plt.plot(NLIST, EvenpCorrListF, c='c', ls='-', label='autocorrelation') # 6. Homoscedacticity plt.plot(NLIST, EvenpHomoHW, c='orange', ls='-', label='homoscedasticity') #plt.plot(NLIST, EvenpHomoHB, c='r', ls='-') # 7. Normally distributed residuals (errors) plt.plot(NLIST, EvenpNormListOmni, c='Lime', ls='-', label='normality') #plt.plot(NLIST, EvenpNormListJB, c='Lime', alpha=0.9, ls='-') #plt.plot(NLIST, EvenpNormListKS, c='Lime', alpha=0.9, ls='--', lw=3) #plt.plot(NLIST, EvenpNormListAD, c='Lime', alpha=0.9, ls='--') plt.plot([1, 100], [0.05, 0.05], c='0.2', ls='--') plt.ylabel('p-value') leg = plt.legend(loc=1, prop={'size': 8}) leg.draw_frame(False) fig.add_subplot(4, 3, 10) plt.xscale('log') plt.ylim(0, 1) plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) # Dominance R2 vs. Sample Size plt.plot(NLIST, RichR2List, c='0.2', ls='--', lw=2, label=r'$R^2$') plt.ylabel(r'$R^2$', fontsize=14) plt.xlabel('Sample size', fontsize=14) plt.text(1.01, 0.82, 'Richness', rotation='vertical', fontsize=16) leg = plt.legend(loc=4, prop={'size': 14}) leg.draw_frame(False) fig.add_subplot(4, 3, 11) plt.ylim(-0.2, 1.2) plt.xscale('log') plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) # Richness Coeffs vs. Sample Size plt.plot(NLIST, Rich_MicSlope_Coeffs, c='r', lw=2, label='Microbe') plt.plot(NLIST, Rich_MacSlope_Coeffs, c='b', lw=2, label='Macrobe') #plt.plot(NLIST, RichIntCoeffList, c='g', label='Interaction') plt.ylabel('Coefficient') plt.xlabel('Sample size', fontsize=14) leg = plt.legend(loc=10, prop={'size': 8}) leg.draw_frame(False) fig.add_subplot(4, 3, 12) plt.xlim(min(SampSizes) - 1, max(SampSizes) + 10) plt.xscale('log') # Richness p-vals vs. Sample Size # 3. The relationship is linear #plt.plot(RichpLinListHC, NLIST, c='m', alpha=0.8) #plt.plot(NLIST,RichpLinListRainB, c='m') plt.plot(NLIST, RichpLinListLM, c='m', ls='-', label='linearity') # 5. Independence of observations (no serial correlation in residuals) #plt.plot(NLIST,RichpCorrListBG, c='c') plt.plot(NLIST, EvenpCorrListF, c='c', ls='-', label='autocorrelation') # 6. Homoscedacticity plt.plot(NLIST, RichpHomoHW, c='orange', ls='-', label='homoscedasticity') #plt.plot(NLIST,RichpHomoHB, c='r', ls='-') # 7. Normally distributed residuals (errors) plt.plot(NLIST, RichpNormListOmni, c='Lime', ls='-', label='normality') #plt.plot(NLIST,RichpNormListJB, c='Lime', ls='-') #plt.plot(NLIST,RichpNormListKS, c='Lime', ls='--', lw=3) #plt.plot(NLIST,RichpNormListAD, c='Lime', ls='--') plt.plot([1, 100], [0.05, 0.05], c='0.2', ls='--') plt.ylabel('p-value') plt.xlabel('Sample size', fontsize=14) leg = plt.legend(loc=1, prop={'size': 8}) leg.draw_frame(False) #plt.tick_params(axis='both', which='major', labelsize=fs-3) plt.subplots_adjust(wspace=0.4, hspace=0.4) plt.savefig(mydir + 'figs/appendix/SampleSize/SampleSizeEffects.png', dpi=600, bbox_inches="tight") #plt.close() #plt.show() return