def testADFTest(): import statsmodels.tsa.stattools as sts import statsmodels.stats.stattools as sss import numpy as np data =np.random.randn(100) #http://statsmodels.sourceforge.net/stable/generated/statsmodels.tsa.stattools.adfuller.html print sts.adfuller(data) #http://statsmodels.sourceforge.net/stable/generated/statsmodels.stats.stattools.jarque_bera.html print sss.jarque_bera(data)
def test_jarque_bera(): #tests against R fBasics st_pv_R = np.array([1.9662677226861689, 0.3741367669648314]) jb = jarque_bera(x)[:2] assert_almost_equal(jb, st_pv_R, 14) st_pv_R = np.array([78.329987305556, 0.000000000000]) jb = jarque_bera(x**2)[:2] assert_almost_equal(jb, st_pv_R, 13) st_pv_R = np.array([5.7135750796706670, 0.0574530296971343]) jb = jarque_bera(np.log(x**2))[:2] assert_almost_equal(jb, st_pv_R, 14) st_pv_R = np.array([2.6489315748495761, 0.2659449923067881]) jb = jarque_bera(np.exp(-x**2))[:2] assert_almost_equal(jb, st_pv_R, 14)
def individualSymbolStats(): '''個股的統計分析 ''' symbols = [ '2330', '2412', '2882', '6505', '2317', '2303', '2002', '1303', '1326', '1301', '2881', '2886', '2409', '2891', '2357', '2382', '3045', '2883', '2454', '2880', '2892', '4904', '2887', '2353', '2324', '2801', '1402', '2311', '2475', '2888', '2408', '2308', '2301', '2352', '2603', '2884', '2890', '2609', '9904', '2610', '1216', '1101', '2325', '2344', '2323', '2371', '2204', '1605', '2615', '2201', ] startDate=date(2005,1,3) endDate=date(2013,12,31) statIO = StringIO() statIO.write('rank & symbol & $R_{C}$(\%) & $R_{A}$(\%) & $\mu$(\%) & $\sigma$(\%) & skew & kurt & $S_p$(\%) & $S_o$(\%) & JB & ADF \\\ \hline \n') for idx, symbol in enumerate(symbols): df = pd.read_pickle(os.path.join(PklBasicFeaturesDir, '%s.pkl'%symbol)) tmp = df[startDate: endDate] rois = tmp['adjROI'].values mean = rois.mean() std = rois.std() skew = spstats.skew(rois) kurt = spstats.kurtosis(rois) sharpe = Performance.Sharpe(rois) sortinof, dd = Performance.SortinoFull(rois) # sortinop = Performance.SortinoPartial(rois) ret = sss.jarque_bera(rois) JB = ret[1] ret2 = sts.adfuller(rois) ADF = ret2[1] rtmp = rois/100 + 1 rtmp[1] -= 0.001425 #buy fee rtmp[-1] -= 0.004425 #sell fee R_cum = rtmp[1:].prod() - 1 AR_cum = np.power((R_cum+1), 1./9) -1 #'rank & symbol & $R_{C}$ & $R_{A}$ $\mu$ & $\sigma$ & skew & kurt & JB & ADF & $S_p$ & $S_o$ statIO.write('%2d & %s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2e & %4.2e \\\ \hline \n'%( idx+1, symbol, R_cum*100, AR_cum*100, mean, std, skew, kurt,sharpe*100, sortinof*100, JB, ADF )) print symbol, R_cum, AR_cum resFile = os.path.join(ExpResultsDir, 'symbol_daily_stats.txt') with open(resFile, 'wb') as fout: fout.write(statIO.getvalue()) statIO.close() statIO.close()
def comparisonStats(): symbols = [ 'TAIEX', '0050', ] startDate=date(2005,1,3) endDate=date(2013,12,31) statIO = StringIO() statIO.write('symbol & $R_{C}$(\%) & $R_{A}$(\%) & ') statIO.write('$\mu$(\%) & $\sigma$(\%) & skew & kurt & ') statIO.write('$S_p$(\%) & $S_o$(\%) & JB & ADF \\\ \hline \n') for idx, symbol in enumerate(symbols): df = pd.read_pickle(os.path.join(PklBasicFeaturesDir, '%s.pkl'%symbol)) print symbol, df.columns tmp = df[startDate: endDate] rois = tmp['adjROI'].values mean = rois.mean() std = rois.std() skew = spstats.skew(rois) kurt = spstats.kurtosis(rois) sharpe = Performance.Sharpe(rois) sortinof, dd = Performance.SortinoFull(rois) print rois # k2, pval = spstats.normaltest(rois) ret = sss.jarque_bera(rois) JB = ret[1] ret2 = sts.adfuller(rois) ADF = ret2[1] rtmp = rois/100 + 1 rtmp[1] -= 0.001425 #buy fee rtmp[-1] -= 0.004425 #sell fee R_cum = rtmp[1:].prod() - 1 AR_cum = np.power((R_cum+1), 1./9) -1 statIO.write(' %s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2e & %4.2e \\\ \hline \n'%( symbol, R_cum*100, AR_cum*100, mean, std, skew, kurt,sharpe*100, sortinof*100, JB, ADF )) print symbol, R_cum, AR_cum resFile = os.path.join(ExpResultsDir, 'comparison_daily_stats.txt') with open(resFile, 'wb') as fout: fout.write(statIO.getvalue()) statIO.close() statIO.close()
def example2(): # X = bimodal(1000) #Let's see how it looks plt.hist(X, bins=50) plt.ylabel('Frequency') plt.xlabel('Value') plt.title('Actual distribution') plt.show() print('mean:', np.mean(X)) print('standard deviation:', np.std(X)) mu = np.mean(X) sigma = np.std(X) N = np.random.normal(mu, sigma, 1000) plt.hist(N, bins=50) plt.ylabel('Frequency') plt.xlabel('Value'); plt.title('Sample normal distribution') from statsmodels.stats.stattools import jarque_bera # Examine whether data are normally distributed using jarque-bera # normality test jarque_bera(X) plt.show() return
def sample_stats(self, pred, tag): """ Get statistics about the samples matching predicate `pred` """ all_samples = [(k,v['raw']) for (k,v) in self.stats_by_config.items() if pred(k)] configs = [k for (k,_) in all_samples] vals = [v for (k,vs) in all_samples for v in vs] stat = util.stats_of_row(vals) return [tag ,len(set(configs)) ,len(vals) ,round(stat["mean"], 2) ,round(stat["variance"], 2) ,"%s~\\textendash~%s" % (round(stat["ci"][0], 2), round(stat["ci"][1], 2)) ,round(math.sqrt(stat["variance"]) / math.sqrt(len(vals)), 2) ,round(jarque_bera(vals)[0], 2)]
def y2yBuyHold(): t = time.time() n_rvs = range(5, 50+5, 5) years = range(2005, 2013+1) resultDir = os.path.join(ExpResultsDir, "BuyandHoldPortfolio") avgIO = StringIO() avgIO.write('startDate, endDate, n_stock, wealth1, wealth2, wROI(%), JB, ADF,' ) avgIO.write('meanROI(%%), Sharpe(%%), SortinoFull(%%), SortinoPartial(%%),') avgIO.write(' downDevFull, downDevPartial\n') for n_rv in n_rvs: df = pd.read_pickle(os.path.join(resultDir,"wealthSum_n%s.pkl"%(n_rv))) for year in years: startDate = date(year, 1, 1) endDate = date(year, 12, 31) print startDate, endDate wealths = df[startDate:endDate] wrois = wealths.pct_change() wrois[0] = 0 wealth1 = wealths[0] wealth2 = wealths[-1] * (1-0.004425) roi = (wealth2/wealth1 - 1) ret = sss.jarque_bera(wrois) JB = ret[1] ret2 = sts.adfuller(wrois) ADF = ret2[1] sharpe = Performance.Sharpe(wrois) sortinof, ddf = Performance.SortinoFull(wrois) sortinop, ddp = Performance.SortinoPartial(wrois) avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,"%( wealths.index[0].strftime("%Y-%m-%d"), wealths.index[-1].strftime("%Y-%m-%d"), n_rv, wealth1, wealth2, roi*100, JB, ADF)) avgIO.write("%s,%s,%s,%s,"%(wrois.mean()*100, sharpe*100, sortinof*100, sortinop*100)) avgIO.write("%s,%s\n"%(ddf*100, ddp*100)) resFile = os.path.join(ExpResultsDir, 'y2yfixedBuyandHold_result_2005.csv') with open(resFile, 'wb') as fout: fout.write(avgIO.getvalue()) avgIO.close() print "y2yBuyandHold OK, elapsed %.3f secs"%(time.time()-t)
def _ROIstats(rois): mu = rois.mean() stdev = rois.std() skew = spstats.skew(rois) kurt = spstats.kurtosis(rois) sharpe = Performance.Sharpe(rois) sortinof, ddf = Performance.SortinoFull(rois) sortinop, ddp = Performance.SortinoPartial(rois) ret = sss.jarque_bera(rois) JB = ret[1] ret2 = sts.adfuller(rois) ADF = ret2[1] return {"mu":mu, "stdev":stdev, "skew":skew, "kurt":kurt, "sharpe":sharpe, "sortinof":sortinof, "sortinop":sortinop, "ddf":ddf, "ddp":ddp}
def datestr2num(s): return datetime.strptime(s, "%Y-%m-%d").date().toordinal() def get_close(symbol): #获取股票数据 quotes = np.loadtxt(symbol, delimiter=',', converters={0: datestr2num}, unpack=False) print "quotes: ", quotes[:5] quotes = quotes[::-1] quotes = np.array(quotes) return quotes.T[4] zx = np.diff(np.log(get_close('../chapter4/datazx.csv'))) zx = zx[:550] ht = np.diff(np.log(get_close('../chapter4/dataht.csv'))) ht = ht[:550] print "Means comparison", stats.ttest_ind(zx, ht) print "Kolmogorov smirnov test", stats.ks_2samp(zx, ht) print "Jarque Bera test", jarque_bera(zx - ht)[1] plt.hist(zx, histtype="step", lw=1, label="zx") plt.hist(ht, histtype="step", lw=2, label="ht") plt.hist(zx - ht, histtype="step", lw=3, label="Delta") plt.legend() plt.show()
def parseSymbolResults(modelType = "fixed"): '''whole period''' if modelType == "fixed": n_rvs = range(5, 55, 5) hist_periods = range(50, 130, 10) alphas = ("0.5", "0.55", "0.6", "0.65", "0.7", "0.75", "0.8", "0.85", "0.9", "0.95", '0.99') myDir = os.path.join(ExpResultsDir, "fixedSymbolSPPortfolio", "LargestMarketValue_200501") elif modelType == "dynamic": n_rvs = range(5, 55, 5) hist_periods = range(90, 120+10, 10) alphas = ("0.5", "0.55", "0.6", "0.65", "0.7") myDir = os.path.join(ExpResultsDir, "dynamicSymbolSPPortfolio", "LargestMarketValue_200501_rv50") for n_rv in n_rvs: t = time() avgIO = StringIO() avgIO.write('run, n_rv, period, alpha, time, wealth, wealth-std, wROI(%), wROI-std,' ) avgIO.write('dROI(%%), stdev, skew, kurt, Sp(%%), Sp-std, StF(%%), StF-std,') avgIO.write('StP(%%), Stp-std, downDevF, downDevP, JB, ADF, CVaRfailRate, VaRfailRate, scen err\n') for period in hist_periods: if n_rv == 50 and period == 50: continue for alpha in alphas: if modelType == "fixed": dirName = "fixedSymbolSPPortfolio_n%s_p%s_s200_a%s"%(n_rv, period, alpha) elif modelType == "dynamic": dirName = "dynamicSymbolSPPortfolio_n%s_p%s_s200_a%s"%(n_rv, period, alpha) exps = glob(os.path.join(myDir, dirName, "20050103-20131231_*")) wealths, ROI_Cs, dROIs, stdevs, skews, kurts =[], [], [], [], [], [] JBs, ADFs = [], [] sharpes, sortinofs, sortinops, downDevF, downDevP = [],[],[],[],[] CVaRFailRates, VaRFailRates = [], [] elapsed, scenerr = [], [] if len(exps) > 3: exps = exps[:3] if len(exps) == 0: avgIO.write('NA,'*26 + '\n') continue for edx, exp in enumerate(exps): print exp summaryFile = os.path.join(exp, "summary.json") summary = json.load(open(summaryFile)) print dirName #wealth and cum ROI wealth = float(summary['final_wealth']) wealths.append(wealth) ROI_Cs.append((wealth/1e6-1) * 100.0) elapsed.append(float(summary['elapsed'])) scenerr.append(summary['scen_err_cnt']) try: dROIs.append(float(summary['wealth_ROI_mean'])*100) stdevs.append(float(summary['wealth_ROI_stdev'])*100) skews.append(float(summary['wealth_ROI_skew'])) kurts.append(float(summary['wealth_ROI_kurt'])) sharpes.append(float(summary['wealth_ROI_Sharpe'])*100) sortinofs.append(float(summary['wealth_ROI_SortinoFull'])*100) sortinops.append(float(summary['wealth_ROI_SortinoPartial'])*100) downDevF.append((float(summary['wealth_ROI_downDevFull']))*100) downDevP.append((float(summary['wealth_ROI_downDevPartial']))*100) JBs.append(float(summary['wealth_ROI_JBTest'])) ADFs.append(float(summary['wealth_ROI_ADFTest'])) except (KeyError,TypeError): #read wealth process print "read raw df n_rv-period-alpha: %s-%s-%s:%s"%(n_rv, period, alpha, edx+1) df = pd.read_pickle(os.path.join(exp, 'wealthProcess.pkl')) proc = df.sum(axis=1) wrois = proc.pct_change() wrois[0] = 0 dROI = wrois.mean() dROIs.append(dROI*100) summary['wealth_ROI_mean'] = dROI stdev = wrois.std() stdevs.append(stdev) summary['wealth_ROI_stdev'] = stdev skew = spstats.skew(wrois) skews.append(skew) summary['wealth_ROI_skew'] = skew kurt = spstats.kurtosis(wrois) kurts.append(kurt) summary['wealth_ROI_kurt'] = kurt sharpe = Performance.Sharpe(wrois) sharpes.append(sharpe*100) summary['wealth_ROI_Sharpe'] = sharpe sortinof, ddf = Performance.SortinoFull(wrois) sortinofs.append(sortinof*100) downDevF.append(ddf*100) summary['wealth_ROI_SortinoFull'] = sortinof summary['wealth_ROI_downDevFull'] = ddf sortinop, ddp = Performance.SortinoPartial(wrois) sortinops.append(sortinop*100) downDevP.append(ddp*100) summary['wealth_ROI_SortinoPartial'] = sortinop summary['wealth_ROI_downDevPartial'] = ddp ret = sss.jarque_bera(wrois) JB = ret[1] JBs.append(JB) summary['wealth_ROI_JBTest'] = JB ret2 = sts.adfuller(wrois) ADF = ret2[1] ADFs.append(ADF) summary['wealth_ROI_ADFTest'] = ADF fileName = os.path.join(exp, 'summary.json') with open (fileName, 'w') as fout: json.dump(summary, fout, indent=4) try: CVaRFailRate = float(summary['CVaR_failRate']*100) VaRFailRate = float(summary['VaR_failRate']*100) CVaRFailRates.append(CVaRFailRate) VaRFailRates.append(VaRFailRate) except (KeyError,TypeError): wealth_df = pd.read_pickle(os.path.join(exp, 'wealthProcess.pkl')) risk_df = pd.read_pickle(os.path.join(exp, 'riskProcess.pkl')) CVaRFailRate, VaRFailRate = VaRBackTest(wealth_df, risk_df) CVaRFailRates.append(CVaRFailRate*100) VaRFailRates.append(VaRFailRate*100) summary['VaR_failRate'] = VaRFailRate summary['CVaR_failRate'] = CVaRFailRate print "CVaR fail:%s, VaR fail:%s"%(CVaRFailRate, VaRFailRate) fileName = os.path.join(exp, 'summary.json') with open (fileName, 'w') as fout: json.dump(summary, fout, indent=4) wealths = np.asarray(wealths) ROI_Cs = np.asarray(ROI_Cs) dROIs = np.asarray(dROIs) stdevs = np.asarray(stdevs) skews = np.asarray(skews) kurts = np.asarray(kurts) JBs = np.asarray(JBs) ADFs = np.asarray(ADFs) sharpes = np.asarray(sharpes) sortinofs = np.asarray(sortinofs) sortinops = np.asarray(sortinops) downDevF = np.asarray(downDevF) downDevP = np.asarray(downDevP) CVaRFailRates = np.asarray(CVaRFailRates) VaRFailRates = np.asarray(VaRFailRates) elapsed = np.asarray(elapsed) scenerr = np.asarray(scenerr) avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,"%(len(ROI_Cs), n_rv, period, alpha, elapsed.mean(), wealths.mean(), wealths.std(), ROI_Cs.mean(), ROI_Cs.std() )) avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,"%(dROIs.mean(), stdevs.mean(), skews.mean(),kurts.mean(), sharpes.mean(), sharpes.std(), sortinofs.mean(), sortinofs.std() )) avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,%s\n"%( sortinops.mean(), sortinops.std(), downDevF.mean(), downDevP.mean(), max(JBs), max(ADFs), CVaRFailRates.mean(), VaRFailRates.mean(),scenerr.mean() )) if modelType == "fixed": resFile = os.path.join(ExpResultsDir, 'avg_fixedSymbolSPPortfolio_n%s_result_2005.csv'%(n_rv)) elif modelType == "dynamic": resFile = os.path.join(ExpResultsDir, 'avg_dynamicSymbolSPPortfolio_n%s_result_2005.csv'%(n_rv)) with open(resFile, 'wb') as fout: fout.write(avgIO.getvalue()) avgIO.close() print "n_rv:%s OK, elapsed %.3f secs"%(n_rv, time()-t)
def y2yResults(modelType="fixed"): ''' ''' global ExpResultsDir if modelType == "fixed": n_rvs = range(5, 55, 5) hist_periods = range(50, 130, 10) alphas = ("0.5", "0.55", "0.6", "0.65", "0.7", "0.75", "0.8", "0.85", "0.9", "0.95") myDir = os.path.join(ExpResultsDir, "fixedSymbolSPPortfolio", "LargestMarketValue_200501") elif modelType == "dynamic": n_rvs = range(5, 55, 5) hist_periods = range(90, 120+10, 10) alphas = ("0.5", "0.55", "0.6", "0.65", "0.7") myDir = os.path.join(ExpResultsDir, "dynamicSymbolSPPortfolio", "LargestMarketValue_200501_rv50") for n_rv in n_rvs: t = time() avgIO = StringIO() avgIO.write('run, startDate, endDate, n_rv, period, alpha, w1, w1-std, w2, w2-std, wROI(%), wROI-std,' ) avgIO.write('dROI(%%), stdev, skew, kurt, Sp(%%), Sp-std, StF(%%), StF-std,') avgIO.write('StP(%%), Stp-std, downDevF, downDevP, JB, ADF, CVaRfailRate, VaRfailRate, scen err\n') for period in hist_periods: if n_rv == 50 and period == 50: continue for alpha in alphas: if modelType == "fixed": dirName = "fixedSymbolSPPortfolio_n%s_p%s_s200_a%s"%(n_rv, period, alpha) elif modelType == "dynamic": dirName = "dynamicSymbolSPPortfolio_n%s_p%s_s200_a%s"%(n_rv, period, alpha) exps = glob(os.path.join(myDir, dirName, "20050103-20131231_*")) if len(exps) > 3: exps = exps[:3] years = range(2005, 2013+1) d1, d2 = len(exps), len(years) wealth1, wealth2, ROI_Cs = np.zeros((d1, d2)), np.zeros((d1, d2)), np.zeros((d1, d2)) dROIs, stdevs, skews, kurts = np.zeros((d1, d2)), np.zeros((d1, d2)), np.zeros((d1, d2)), np.zeros((d1, d2)) JBs, ADFs = np.zeros((d1, d2)), np.zeros((d1, d2)) sharpes = np.zeros((d1, d2)) sortinops, downDevP = np.zeros((d1, d2)), np.zeros((d1, d2)) sortinofs,downDevF = np.zeros((d1, d2)), np.zeros((d1, d2)) CVaRFailRates, VaRFailRates = np.zeros((d1, d2)), np.zeros((d1, d2)) for edx, exp in enumerate(exps): wealth_df = pd.read_pickle(os.path.join(exp, 'wealthProcess.pkl')) risk_df = pd.read_pickle(os.path.join(exp, 'riskProcess.pkl')) for ydx, year in enumerate(years): startDate = date(year,1,1) endDate = date(year, 12, 31) exp_wealth_df = wealth_df[startDate:endDate] exp_risk_df = risk_df[startDate:endDate] #wealth wealth = exp_wealth_df.sum(axis=1) wealth[-1] *= (1-0.004425) wealth1[edx,ydx] = wealth[0] wealth2[edx,ydx] = wealth[-1] #cum ROI roi = (wealth[-1]/wealth[0] - 1) wrois = wealth.pct_change() wrois[0] = 0 ROI_Cs[edx, ydx] = roi * 100 #stats dROIs[edx, ydx] = wrois.mean() * 100 stdevs[edx, ydx] = wrois.std()*100 skews[edx, ydx] = spstats.skew(wrois) kurts[edx, ydx] = spstats.kurtosis(wrois) #JB, ADF ret = sss.jarque_bera(wrois) JB = ret[1] ret2 = sts.adfuller(wrois) ADF = ret2[1] JBs[edx, ydx] = JB ADFs[edx, ydx] = ADF #Sharpe sharpe = Performance.Sharpe(wrois) sharpes[edx, ydx] = sharpe * 100 sortinof, ddf = Performance.SortinoFull(wrois) sortinofs[edx, ydx] = sortinof * 100 downDevF[edx, ydx] = ddf*100 sortinop, ddp = Performance.SortinoPartial(wrois) sortinops[edx, ydx] = sortinop*100 downDevP[edx, ydx] = ddp*100 CVaRFailRate, VaRFailRate = VaRBackTest(exp_wealth_df, exp_risk_df) CVaRFailRates[edx, ydx] = CVaRFailRate*100 VaRFailRates[edx, ydx] = VaRFailRate*100 for ydx, year in enumerate(years): startDate = date(year,1,1) endDate = date(year, 12, 31) exp_df = wealth_df[startDate:endDate] #avgIO.write('run, startDate, endDate, n_rv, period, alpha, w1, w1-std, w2, w2-std, wROI(%), wROI-std,' ) #avgIO.write('dROI(%%), stdev, skew, kurt, Sp(%%), Sp-std, StF(%%), StF-std,') #avgIO.write('StP(%%), Stp-std, downDevF, downDevP, JB, ADF, CVaRfailRate, VaRfailRate\n') avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"%( len(exps),exp_df.index[0].strftime("%Y-%m-%d"), exp_df.index[-1].strftime("%Y-%m-%d"), n_rv, period, alpha, wealth1[:,ydx].mean(), wealth1[:,ydx].std(), wealth2[:,ydx].mean(), wealth2[:,ydx].std(), ROI_Cs[:, ydx].mean(), ROI_Cs[:, ydx].std(), )) avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,"%( dROIs[:, ydx].mean(), stdevs[:, ydx].mean(), skews[:, ydx].mean(), kurts[:, ydx].mean(), sharpes[:,ydx].mean(), sharpes[:,ydx].std(), sortinofs[:,ydx].mean(), sortinofs[:,ydx].std() )) avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s\n"%( sortinops[:,ydx].mean(), sortinops[:,ydx].std(), downDevF[:,ydx].mean(), downDevP[:,ydx].mean(), max(JBs[:,ydx]), max(ADFs[:,ydx]), CVaRFailRates[:,ydx].mean(), VaRFailRates[:,ydx].mean())) print "n_rv:%s p:%s a:%s endDate:%s run:%s"%(n_rv, period, alpha, endDate, edx+1) if modelType == "fixed": resFile = os.path.join(ExpResultsDir, 'avg_y2yfixedSymbolSPPortfolio_n%s_result_2005.csv'%(n_rv)) elif modelType == "dynamic": resFile = os.path.join(ExpResultsDir, 'avg_y2ydynamicSymbolSPPortfolio_n%s_result_2005.csv'%(n_rv)) with open(resFile, 'ab') as fout: fout.write(avgIO.getvalue()) avgIO.close() print "n_rv:%s OK, elapsed %.3f secs"%(n_rv, time()-t)
end_date='2015-01-01') # This will give us the number of dollars returned each day absolute_returns = np.diff(prices) # This will give us the percentage return over the last day's value # the [:-1] notation gives us all but the last item in the array # We do this because there are no returns on the final price in the array. returns = absolute_returns / prices[:-1] # Let's use `scipy`'s fit function to get the $\mu$ and $\sigma$ MLEs. # In[43]: mu, std = scipy.stats.norm.fit(returns) pdf = scipy.stats.norm.pdf x = np.linspace(-1, 1, num=100) h = plt.hist(returns, bins=x, normed='true') l = plt.plot(x, pdf(x, loc=mu, scale=std)) # Of course, this fit is meaningless unless we've tested that they obey a normal distribution first. We can test this using the Jarque-Bera normality test. The Jarque-Bera test will reject the hypothesis of a normal distribution if the p-value is under a c. # In[45]: from statsmodels.stats.stattools import jarque_bera jarque_bera(returns) # In[46]: jarque_bera(np.random.normal(0, 1, 100)) # *This presentation is for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation for any security; nor does it constitute an offer to provide investment advisory or other services by Quantopian, Inc. ("Quantopian"). Nothing contained herein constitutes investment advice or offers any opinion with respect to the suitability of any security, and any views expressed herein should not be taken as advice to buy, sell, or hold any security or as an endorsement of any security or company. In preparing the information contained herein, Quantopian, Inc. has not taken into account the investment needs, objectives, and financial circumstances of any particular investor. Any views expressed and data illustrated herein were prepared based upon information, believed to be reliable, available to Quantopian, Inc. at the time of publication. Quantopian makes no guarantees as to their accuracy or completeness. All information is subject to change and may quickly become unreliable for various reasons, including changes in market conditions or economic circumstances.*
def predict_arma(ad_group, pred_date): warnings.filterwarnings("ignore") ads_file = 'data/ad_table.csv' df = pd.read_csv(ads_file, header=0, sep=',') df['date'] = pd.to_datetime(df['date'], infer_datetime_format=True) best_aic = np.inf best_order = None best_mdl = None max_lag = 30 tuning_result = {} # list_ad_group = set(df['ad'].values) if (ad_group in df['ad'].unique()): df_ad_group_train = df[df['ad'] == ad_group] df_ad_group_train = df_ad_group_train.reset_index() df_arma_train = df_ad_group_train[['shown', 'date']] series_train = pd.Series(df_arma_train['shown'], index=df_arma_train.index) for alpha in range(5): for beta in range(5): try: tmp_mdl = ARMA(series_train.values, order=(alpha, beta)).fit(method='mle', trend='nc') tmp_aic = tmp_mdl.aic if tmp_aic < best_aic: best_aic = tmp_aic best_order = (alpha, beta) best_mdl = tmp_mdl except: continue score, pvalue, _, _ = jarque_bera(best_mdl.resid) if pvalue < 0.10: print('The residuals may not be normally distributed.') else: print('The residuals seem normally distributed.') tuning_result = (best_aic, best_order) print('Ad_group: {} aic: {:6.2f} | best order: {}'.format( ad_group, best_aic, best_order)) df_ad_group_train['time_period'] = ( df_ad_group_train['date'] - df_ad_group_train['date'][0]).dt.days X = df_ad_group_train[['time_period']].values y = df_ad_group_train['shown'].values series_train.plot(title='Shown values trend', color='C1') plt.ylabel('shown values') plt.xlabel('Days gap from 2015-10-01') plt.scatter(X, y, facecolor='gray', edgecolors='none') plt.show() #check for auto correlation lag_plot(series_train) plt.show() autocorrelation_plot(series_train) plt.show() plot_acf(series_train.values, lags=max_lag) plt.show() data = series_train.values data = data.astype('float32') model = ARMA(data, order=best_order) # model_fit = model.fit(transparams=False) try: model_fit = model.fit(transparams=False) model_fit.plot_predict(plot_insample=True) plt.scatter(X, y, color='gray') plt.title('ARMA') plt.show() days_gap = (pd.to_datetime(pred_date) - df_arma_train['date'][0]).days forecast = model_fit.forecast(steps=days_gap) print('Prediction of shown value for', pred_date, '=') print(forecast[0][0]) except ValueError: print('This data is not suitable for ARMA') else: print("Ad group does not exist")
initial = 1000 X = NormalRandomVariable(0, 1) samples = X.draw(200) Y = pd.Series(np.cumsum(samples) + initial, name='Y') Y.plot() plt.show() #模拟资产组合的曲线 start = '2015-01-01' end = '2016-01-01' prices = get_pricing('TSLA', fields=['price'], start_date=start, end_date=end) returns = prices.pct_change()[1:] cutoff = 0.01 _, p_value, skewness, kurtosis = stattools.jarque_bera(returns) print(p_value, skewness, kurtosis) plt.hist(returns.price, bins=20) plt.ylabel('Occurrences') plt.show() sample_mean = np.mean(returns.price) sample_std = np.std(returns.price) x = np.linspace(-(sample_mean + 4 * sample_std), sample_mean + 4 * sample_std, 200) sample_distribution = ((1 / np.sqrt(sample_std * sample_std * 2 * np.pi)) * np.exp(-(x - sample_mean) * (x - sample_mean) / (2 * sample_std * sample_std))) plt.hist(returns.price, bins=20, normed=True) plt.plot(x, sample_distribution)
def summary(self, yname=None, xname=None, title=None, alpha=.05): """Summarize the Regression Results Parameters ----------- yname : string, optional Default is `y` xname : list of strings, optional Default is `var_##` for ## in p the number of regressors title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ #TODO: import where we need it (for now), add as cached attributes from statsmodels.stats.stattools import (jarque_bera, omni_normtest, durbin_watson) jb, jbpv, skew, kurtosis = jarque_bera(self.wresid) omni, omnipv = omni_normtest(self.wresid) eigvals = self.eigenvals condno = self.condition_number self.diagn = dict(jb=jb, jbpv=jbpv, skew=skew, kurtosis=kurtosis, omni=omni, omnipv=omnipv, condno=condno, mineigval=eigvals[0]) top_left = [('Dep. Variable:', None), ('Model:', None), ('Method:', ['Least Squares']), ('Date:', None), ('Time:', None)] top_right = [ ('Pseudo R-squared:', ["%#8.4g" % self.prsquared]), ('Bandwidth:', ["%#8.4g" % self.bandwidth]), ('Sparsity:', ["%#8.4g" % self.sparsity]), ('No. Observations:', None), ('Df Residuals:', None), #[self.df_resid]), #TODO: spelling ('Df Model:', None) #[self.df_model]) ] diagn_left = [('Omnibus:', ["%#6.3f" % omni]), ('Prob(Omnibus):', ["%#6.3f" % omnipv]), ('Skew:', ["%#6.3f" % skew]), ('Kurtosis:', ["%#6.3f" % kurtosis])] diagn_right = [('Durbin-Watson:', ["%#8.3f" % durbin_watson(self.wresid)]), ('Jarque-Bera (JB):', ["%#8.3f" % jb]), ('Prob(JB):', ["%#8.3g" % jbpv]), ('Cond. No.', ["%#8.3g" % condno])] if title is None: title = self.model.__class__.__name__ + ' ' + "Regression Results" #create summary table instance from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=.05, use_t=True) # smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, #yname=yname, xname=xname, #title="") #add warnings/notes, added to text format only etext = [] if eigvals[-1] < 1e-10: wstr = "The smallest eigenvalue is %6.3g. This might indicate " wstr += "that there are\n" wstr += "strong multicollinearity problems or that the design " wstr += "matrix is singular." wstr = wstr % eigvals[-1] etext.append(wstr) elif condno > 1000: #TODO: what is recommended wstr = "The condition number is large, %6.3g. This might " wstr += "indicate that there are\n" wstr += "strong multicollinearity or other numerical " wstr += "problems." wstr = wstr % condno etext.append(wstr) if etext: smry.add_extra_txt(etext) return smry
分布の正規性を確認するのによく使われる検定を紹介する。 $\text{H}_0$:正規分布である $\text{H}_A$:$\text{H}_0$は成立しない 正規性の判断には分布の以下の特徴に基づいている。 * 歪度(わいど;Skewness):分布の左右の偏り * 尖度(せんど;Kurtosis):分布の「頂上」とがり具合 `statsmodels`のサブパッケージの一部として含まれている。 上で使った`data_norm`で試してみよう。 jarque_bera(data_norm) 返り値 1. JB検定統計量 2. JBの$p$値 3. 歪度の推定値(正規分布の場合は0) 4. 尖度の推定値(正規分布の場合には3になるように設定されている) この例では$p$値が高いため$\text{H}_0$は棄却できない。 --- 次に`data_uniform`を試してみよう。 $p$値は非常に小さいため,1%有意水準でも$\text{H}_0$を棄却できる。
def numeric(self) -> pd.DataFrame: """ Descriptive statistics for numeric data Returns ------- DataFrame The statistics of the numeric columns """ df: pd.DataFrame = self._data.loc[:, self._is_numeric] cols = df.columns _, k = df.shape std = df.std() count = df.count() mean = df.mean() mad = (df - mean).abs().mean() std_err = std.copy() std_err.loc[count > 0] /= count.loc[count > 0] if self._use_t: q = stats.t(count - 1).ppf(1.0 - self._alpha / 2) else: q = stats.norm.ppf(1.0 - self._alpha / 2) def _mode(ser): mode_res = stats.mode(ser.dropna()) if mode_res[0].shape[0] > 0: return [float(val) for val in mode_res] return np.nan, np.nan mode_values = df.apply(_mode).T if mode_values.size > 0: if isinstance(mode_values, pd.DataFrame): # pandas 1.0 or later mode = np.asarray(mode_values[0], dtype=float) mode_counts = np.asarray(mode_values[1], dtype=np.int64) else: # pandas before 1.0 returns a Series of 2-elem list mode = [] mode_counts = [] for idx in mode_values.index: val = mode_values.loc[idx] mode.append(val[0]) mode_counts.append(val[1]) mode = np.atleast_1d(mode) mode_counts = np.atleast_1d(mode_counts) else: mode = mode_counts = np.empty(0) loc = count > 0 mode_freq = np.full(mode.shape[0], np.nan) mode_freq[loc] = mode_counts[loc] / count.loc[loc] if df.shape[1] > 0: iqr = df.quantile(0.75) - df.quantile(0.25) else: iqr = mean jb = df.apply(lambda x: list(jarque_bera(x.dropna())), result_type="expand").T nan_mean = mean.copy() nan_mean.loc[nan_mean == 0] = np.nan coef_var = std / nan_mean results = { "nobs": pd.Series(np.ones(k, dtype=np.int64) * df.shape[0], index=cols), "missing": df.shape[0] - count, "mean": mean, "std_err": std_err, "upper_ci": mean + q * std_err, "lower_ci": mean - q * std_err, "std": std, "iqr": iqr, "mad": mad, "coef_var": coef_var, "range": pd_ptp(df), "max": df.max(), "min": df.min(), "skew": jb[2], "kurtosis": jb[3], "iqr_normal": iqr / np.diff(stats.norm.ppf([0.25, 0.75])), "mad_normal": mad / np.sqrt(2 / np.pi), "jarque_bera": jb[0], "jarque_bera_pval": jb[1], "mode": pd.Series(mode, index=cols), "mode_freq": pd.Series(mode_freq, index=cols), "median": df.median(), } final = {k: v for k, v in results.items() if k in self._stats} results_df = pd.DataFrame(list(final.values()), columns=cols, index=list(final.keys())) if "percentiles" not in self._stats: return results_df # Pandas before 1.0 cannot handle empty DF if df.shape[1] > 0: perc = df.quantile(self._percentiles / 100).astype(float) else: perc = pd.DataFrame(index=self._percentiles / 100, dtype=float) if np.all(np.floor(100 * perc.index) == (100 * perc.index)): perc.index = [f"{int(100 * idx)}%" for idx in perc.index] else: dupe = True scale = 100 index = perc.index while dupe: scale *= 10 idx = np.floor(scale * perc.index) print(np.diff(idx)) if np.all(np.diff(idx) > 0): dupe = False index = np.floor(scale * index) / (scale / 100) fmt = f"0.{len(str(scale//100))-1}f" output = f"{{0:{fmt}}}%" perc.index = [output.format(val) for val in index] return self._reorder(pd.concat([results_df, perc], 0))
y = np.asarray(result_reg.resid**2) x = np.asarray(logdiffdiffjuros) resultadoBP = sm.OLS(y, x).fit() fval = resultadoBP.fvalue fpval = resultadoBP.f_pvalue lm = 71 * resultadoBP.rsquared lmtest = chi2.sf(lm, 70) print("P-valor do teste:", fpval) print("Teste LM:", lmtest) ## QQ plot da normalidade dos resíduos qqplot = smgof.qqplot(result_reg.resid, line='q') plt.show() ## Teste de JB para a normalidade dos resíduos jarque_bera = smtools.jarque_bera(result_reg.resid) print("P-valor do teste: ", jarque_bera[1]) print("Skewness estimada: ", jarque_bera[2]) print("Kurtose estimada: ", jarque_bera[3]) ## Teste de ljung-box para a autocrrelação dos resíduos ljung = smstats.acorr_ljungbox(result_reg.resid) y = 0 for x in ljung[1]: y += x resul_lbox = y / len(ljung[1]) print("P-valor do teste:", resul_lbox) # print(ljung[1]) ## Teste de Cointegração de Johansen coint_test = smtsa.coint(econvars.juros, econvars.txinadimp)
def exp_symbols_statistics(fout_path=os.path.join( DATA_DIR, 'exp_symbols_statistics.xlsx')): """ statistics of experiment symbols output the results to xlsx """ t0 = time() fin_path = os.path.join(SYMBOLS_PKL_DIR, 'TAIEX_2005_largest50cap_panel.pkl') # shape: (n_exp_period, n_stock, ('simple_roi', 'close_price')) panel = pd.read_pickle(fin_path) assert panel.major_axis.tolist() == EXP_SYMBOLS panel = panel.loc[date(2005, 1, 3):date(2014, 12, 31)] # the roi in the first experiment date is zero panel.loc[date(2005, 1, 3), :, 'simple_roi'] = 0. stat_indices = ( # basic information 'start_date', 'end_date', 'n_exp_period', 'n_period_up', 'n_period_down', # roi 'cum_roi', 'daily_roi', 'daily_mean_roi', 'daily_std_roi', 'daily_skew_roi', 'daily_kurt_roi', # roi/risk indices 'sharpe', 'sortino_full', 'sortino_full_semi_std', 'sortino_partial', 'sortino_partial_semi_std', 'max_abs_drawdown', # normal tests 'JB', 'JB_pvalue', # uni-root tests 'ADF_c', 'ADF_c_pvalue', 'ADF_ct', 'ADF_ct_pvalue', 'ADF_ctt', 'ADF_ctt_pvalue', 'ADF_nc', 'ADF_nc_pvalue', 'DFGLS_c', 'DFGLS_c_pvalue', 'DFGLS_ct', 'DFGLS_ct_pvalue', 'PP_c', 'PP_c_pvalue', 'PP_ct', 'PP_ct_pvalue', 'PP_nc', 'PP_nc_pvalue', 'KPSS_c', 'KPSS_c_pvalue', 'KPSS_ct', 'KPSS_ct_pvalue', # performance 'SPA_l_pvalue', 'SPA_c_pvalue', 'SPA_u_pvalue') stat_df = pd.DataFrame(np.zeros((len(stat_indices), len(EXP_SYMBOLS))), index=stat_indices, columns=EXP_SYMBOLS) for rdx, symbol in enumerate(EXP_SYMBOLS): t1 = time() rois = panel[:, symbol, 'simple_roi'] # basic stat_df.loc['start_date', symbol] = rois.index[0].strftime("%Y/%b/%d") stat_df.loc['end_date', symbol] = rois.index[-1].strftime("%Y/%b/%d") stat_df.loc['n_exp_period', symbol] = len(rois) stat_df.loc['n_period_up', symbol] = (rois > 0).sum() stat_df.loc['n_period_down', symbol] = (rois < 0).sum() # roi stat_df.loc['cum_roi', symbol] = (rois + 1.).prod() - 1 stat_df.loc['daily_roi', symbol] = np.power( (rois + 1.).prod(), 1. / len(rois)) - 1 stat_df.loc['daily_mean_roi', symbol] = rois.mean() stat_df.loc['daily_std_roi', symbol] = rois.std() stat_df.loc['daily_skew_roi', symbol] = rois.skew() stat_df.loc['daily_kurt_roi', symbol] = rois.kurt() # excess # roi/risk indices stat_df.loc['sharpe', symbol] = sharpe(rois) (stat_df.loc['sortino_full', symbol], stat_df.loc['sortino_full_semi_std', symbol]) = sortino_full(rois) (stat_df.loc['sortino_partial', symbol], stat_df.loc['sortino_partial_semi_std', symbol]) = sortino_partial(rois) stat_df.loc['max_abs_drawdown', symbol] = maximum_drawdown(rois) # normal tests jb = jarque_bera(rois) stat_df.loc['JB', symbol] = jb[0] stat_df.loc['JB_pvalue', symbol] = jb[1] # uniroot tests adf_c = adfuller(rois, regression='c') stat_df.loc['ADF_c', symbol] = adf_c[0] stat_df.loc['ADF_c_pvalue', symbol] = adf_c[1] adf_ct = adfuller(rois, regression='ct') stat_df.loc['ADF_ct', symbol] = adf_ct[0] stat_df.loc['ADF_ct_pvalue', symbol] = adf_ct[1] adf_ctt = adfuller(rois, regression='ctt') stat_df.loc['ADF_ctt', symbol] = adf_ctt[0] stat_df.loc['ADF_ctt_pvalue', symbol] = adf_ctt[1] adf_nc = adfuller(rois, regression='nc') stat_df.loc['ADF_nc', symbol] = adf_nc[0] stat_df.loc['ADF_nc_pvalue', symbol] = adf_nc[1] dfgls_c_instance = DFGLS(rois, trend='c') dfgls_c, dfgls_c_pvalue = (dfgls_c_instance.stat, dfgls_c_instance.pvalue) stat_df.loc['DFGLS_c', symbol] = dfgls_c stat_df.loc['DFGLS_c_pvalue', symbol] = dfgls_c_pvalue dfgls_ct_instance = DFGLS(rois, trend='ct') dfgls_ct, dfgls_ct_pvalue = (dfgls_ct_instance.stat, dfgls_ct_instance.pvalue) stat_df.loc['DFGLS_ct', symbol] = dfgls_ct stat_df.loc['DFGLS_ct_pvalue', symbol] = dfgls_ct_pvalue pp_c_instance = PhillipsPerron(rois, trend='c') pp_c, pp_c_pvalue = (pp_c_instance.stat, pp_c_instance.pvalue) stat_df.loc['PP_c', symbol] = pp_c stat_df.loc['PP_c_pvalue', symbol] = pp_c_pvalue pp_ct_instance = PhillipsPerron(rois, trend='ct') pp_ct, pp_ct_pvalue = (pp_ct_instance.stat, pp_ct_instance.pvalue) stat_df.loc['PP_ct', symbol] = pp_ct stat_df.loc['PP_ct_pvalue', symbol] = pp_ct_pvalue pp_nc_instance = PhillipsPerron(rois, trend='nc') pp_nc, pp_nc_pvalue = (pp_nc_instance.stat, pp_nc_instance.pvalue) stat_df.loc['PP_nc', symbol] = pp_nc stat_df.loc['PP_nc_pvalue', symbol] = pp_nc_pvalue kpss_c_instance = KPSS(rois, trend='c') kpss_c, kpss_c_pvalue = (kpss_c_instance.stat, kpss_c_instance.pvalue) stat_df.loc['KPSS_c', symbol] = kpss_c stat_df.loc['KPSS_c_pvalue', symbol] = kpss_c_pvalue kpss_ct_instance = KPSS(rois, trend='ct') kpss_ct, kpss_ct_pvalue = (kpss_ct_instance.stat, kpss_ct_instance.pvalue) stat_df.loc['KPSS_ct', symbol] = kpss_ct stat_df.loc['KPSS_ct_pvalue', symbol] = kpss_ct_pvalue # performance spa = SPA(rois, np.zeros(len(rois)), reps=5000) spa.seed(np.random.randint(0, 2**31 - 1)) spa.compute() stat_df.loc['SPA_l_pvalue', symbol] = spa.pvalues[0] stat_df.loc['SPA_c_pvalue', symbol] = spa.pvalues[1] stat_df.loc['SPA_u_pvalue', symbol] = spa.pvalues[2] print("[{}/{}] {} roi statistics OK, {:.3f} secs".format( rdx + 1, len(EXP_SYMBOLS), symbol, time() - t1)) # write to excel writer = pd.ExcelWriter(fout_path, engine='xlsxwriter') stat_df = stat_df.T stat_df.to_excel(writer, sheet_name='stats') # Get the xlsxwriter workbook and worksheet objects. workbook = writer.book worksheet = writer.sheets['stats'] # basic formats. # set header header_fmt = workbook.add_format() header_fmt.set_text_wrap() worksheet.set_row(0, 15, header_fmt) # set date date_fmt = workbook.add_format({'num_format': 'yy/mmm/dd'}) date_fmt.set_align('right') worksheet.set_column('B:C', 12, date_fmt) # set percentage percent_fmt = workbook.add_format({'num_format': '0.00%'}) worksheet.set_column('G:J', 8, percent_fmt) worksheet.set_column('M:Q', 8, percent_fmt) worksheet.set_column('T:T', 8, percent_fmt) worksheet.set_column('V:V', 8, percent_fmt) worksheet.set_column('X:X', 8, percent_fmt) worksheet.set_column('Z:Z', 8, percent_fmt) worksheet.set_column('AB:AB', 8, percent_fmt) worksheet.set_column('AD:AD', 8, percent_fmt) worksheet.set_column('AF:AF', 8, percent_fmt) worksheet.set_column('AH:AH', 8, percent_fmt) worksheet.set_column('AJ:AJ', 8, percent_fmt) worksheet.set_column('AL:AL', 8, percent_fmt) worksheet.set_column('AN:AN', 8, percent_fmt) worksheet.set_column('AP:AP', 8, percent_fmt) worksheet.set_column('AQ:AS', 8, percent_fmt) writer.save() print("all roi statistics OK, {:.3f} secs".format(time() - t0))
from statsmodels.stats.stattools import jarque_bera import matplotlib.pyplot as plt def get_close(symbol): today = date.today() start = (today.year - 1, today.month, today.day) quotes = quotes_historical_yahoo(symbol, start, today) quotes = np.array(quotes) return quotes.T[4] spy = np.diff(np.log(get_close("SPY"))) dia = np.diff(np.log(get_close("DIA"))) print("Means comparison", stats.ttest_ind(spy, dia)) print("Kolmogorov smirnov test", stats.ks_2samp(spy, dia)) print("Jarque Bera test", jarque_bera(spy - dia)[1]) plt.title('Log returns of SPY and DIA') plt.hist(spy, histtype="step", lw=1, label="SPY") plt.hist(dia, histtype="step", lw=2, label="DIA") plt.hist(spy - dia, histtype="step", lw=3, label="Delta") plt.xlabel('Log returns') plt.ylabel('Counts') plt.grid() plt.legend(loc='best') plt.show()
def main(args): np.random.seed(9876789) df = pd.read_csv(args.train_data_path) feature = args.feature.split(",") s1 = ' + '.join(feature) s2 = args.label s = s2 + " ~ " + s1 if args.type == "ols": results = smf.ols(s, data=df).fit(use_t=True) elif args.type == "gls": results = smf.gls(s, data=df).fit(use_t=True) elif args.type == "glsar": results = smf.glsar(s, data=df).fit(use_t=True) elif args.type == "wls": results = smf.wls(s, data=df).fit(use_t=True) else: print("No this type!!!") exit(0) print( "**********************************************************************************\n" ) alpha = args.alpha # print(results.summary()) data_t = { "coef": results.params, "std err": results.bse, "t": results.tvalues, "P>|t|": results.pvalues, "[" + str(alpha / 2.0): results.conf_int(alpha)[0], str(1 - alpha / 2.0) + "]": results.conf_int(alpha)[1] } sdata_df = pd.DataFrame(data_t) print(sdata_df) sdata_df.to_csv(args.output2) from statsmodels.stats.stattools import (jarque_bera, omni_normtest, durbin_watson) jb, jbpv, skew, kurtosis = jarque_bera(results.wresid) omni, omnipv = omni_normtest(results.wresid) title = [ "Model", "R-squared", "Adj. R-squared", "F-statistic", "Prob (F-statistic)", "Log-Likelihood", "AIC", "BIC", "Omnibus", "Prob(Omnibus)", "Skew", "Kurtosis", "Durbin-Watson", "Jarque-Bera (JB)", "Prob(JB)", "Cond. No." ] value = [ results.model.__class__.__name__, results.rsquared, results.rsquared_adj, results.fvalue, results.f_pvalue, results.llf, results.aic, results.bic, omni, omnipv, skew, kurtosis, durbin_watson(results.wresid), jb, jbpv, results.diagn['condno'] ] datadf = {"title": np.array(title), "value": np.array(value)} select_df = pd.DataFrame(datadf) print(select_df) select_df.to_csv(args.output1) # 画1D或者3D图形 predicted = results.predict(df) import matplotlib.pyplot as plt if len(feature) == 1: x = np.array(df[feature]).reshape(-1, 1) y = np.array(df[s2]).reshape(-1, 1) plt.figure(facecolor='white', figsize=(10, 5)) plt.scatter(x, y, marker='x') plt.plot(x, predicted, c='r') title = 'The Linear Graph of One Dimension' # 绘制x轴和y轴坐标 plt.xlabel(feature[0]) plt.ylabel(s2) plt.title(title) plt.grid() plt.savefig(args.data_png, format='png') elif len(feature) == 2: from mpl_toolkits.mplot3d import Axes3D ax1 = plt.axes(projection='3d') x = np.array(df[feature[0]]).reshape(-1, 1) y = np.array(df[feature[1]]).reshape(-1, 1) z = np.array(df[s2]).reshape(-1, 1) ax1.scatter3D(x, y, z, cmap='Blues') # 绘制散点图 ax1.plot3D(x, y, predicted, 'gray') # 绘制空间曲线 ax1.set_xlabel(feature[0]) ax1.set_ylabel(feature[1]) ax1.set_zlabel(s2) plt.savefig(args.data_png, format='png') else: print("The number of feature is big than 2 ,no plot!") return
def test_jarqueBera_jResult(self): data = np.random.normal(0, 100, 1000) j1, p1 = jarque_bera_test(data) j2, p2, skew, kurtosis = jarque_bera(data) assert pytest.approx(j2) == j1
stat, p = stats.normaltest(data) if p > 0.05: scipy_normal_test_passed += 1 # scipy kstest stat, p = stats.kstest(data, 'norm') if p > 0.05: scipy_kstest_passed += 1 # statsmodels kstest statistic, p_value = sm.stats.diagnostic.kstest_normal(data) if p_value > 0.05: statsmodels_kstest_passed += 1 # statsmodels jarque_bera jbstat, pvalue, skew, kurtosis = stattools.jarque_bera(data) if pvalue > 0.05: # Same as scipy.normaltest! statsmodels_jarque_bera_passed += 1 row = [ scipy_normal_test_passed, scipy_kstest_passed, statsmodels_kstest_passed, statsmodels_jarque_bera_passed ] results.append(row) results = np.array(results) # Convert to NumPy for fancy indexing plt.plot(sample_size_values, results[:, 0],
qq_ax = plt.subplot2grid(layout, (2, 0)) pp_ax = plt.subplot2grid(layout, (2, 1)) y.plot(ax=ts_ax) ts_ax.set_title('Time Series Analysis Plots') smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.05) smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.05) sm.qqplot(y, line='s', ax=qq_ax) qq_ax.set_title('QQ Plot') scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax) plt.tight_layout() return # Select best lag order for AAPL returns max_lag = 30 mdl = smt.ARMA(lrets, order=(1,1)).fit(maxlag=max_lag, method='mle', trend='nc') print(mdl.summary()) _ = tsplot(lrets, max_lag) _ = tsplot(mdl.resid, max_lag) from statsmodels.stats.stattools import jarque_bera score, pvalue, _, _ = jarque_bera(mdl.resid) if pvalue < 0.10: print ('We have reason to suspect the residuals are not normally distributed.') else: print ('The residuals seem normally distributed.')
def summary(self, yname=None, xname=None, title=None, alpha=.05): """Summarize the Regression Results Parameters ----------- yname : string, optional Default is `y` xname : list of strings, optional Default is `var_##` for ## in p the number of regressors title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ #TODO: import where we need it (for now), add as cached attributes from statsmodels.stats.stattools import (jarque_bera, omni_normtest, durbin_watson) jb, jbpv, skew, kurtosis = jarque_bera(self.wresid) omni, omnipv = omni_normtest(self.wresid) eigvals = self.eigenvals condno = self.condition_number self.diagn = dict(jb=jb, jbpv=jbpv, skew=skew, kurtosis=kurtosis, omni=omni, omnipv=omnipv, condno=condno, mineigval=eigvals[0]) top_left = [('Dep. Variable:', None), ('Model:', None), ('Method:', ['Least Squares']), ('Date:', None), ('Time:', None) ] top_right = [('Pseudo R-squared:', ["%#8.4g" % self.prsquared]), ('Bandwidth:', ["%#8.4g" % self.bandwidth]), ('Sparsity:', ["%#8.4g" % self.sparsity]), ('No. Observations:', None), ('Df Residuals:', None), #[self.df_resid]), #TODO: spelling ('Df Model:', None) #[self.df_model]) ] diagn_left = [('Omnibus:', ["%#6.3f" % omni]), ('Prob(Omnibus):', ["%#6.3f" % omnipv]), ('Skew:', ["%#6.3f" % skew]), ('Kurtosis:', ["%#6.3f" % kurtosis]) ] diagn_right = [('Durbin-Watson:', ["%#8.3f" % durbin_watson(self.wresid)]), ('Jarque-Bera (JB):', ["%#8.3f" % jb]), ('Prob(JB):', ["%#8.3g" % jbpv]), ('Cond. No.', ["%#8.3g" % condno]) ] if title is None: title = self.model.__class__.__name__ + ' ' + "Regression Results" #create summary table instance from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=.05, use_t=True) # smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, #yname=yname, xname=xname, #title="") #add warnings/notes, added to text format only etext = [] if eigvals[-1] < 1e-10: wstr = "The smallest eigenvalue is %6.3g. This might indicate " wstr += "that there are\n" wstr += "strong multicollinearity problems or that the design " wstr += "matrix is singular." wstr = wstr % eigvals[-1] etext.append(wstr) elif condno > 1000: #TODO: what is recommended wstr = "The condition number is large, %6.3g. This might " wstr += "indicate that there are\n" wstr += "strong multicollinearity or other numerical " wstr += "problems." wstr = wstr % condno etext.append(wstr) if etext: smry.add_extra_txt(etext) return smry
from statsmodels.stats import stattools # In a notebook cell, run: stattools.jarque_bera? jbstat, pvalue, skew, kurtosis = stattools.jarque_bera(heights) print(pvalue) if pvalue < 0.05: print("We reject the null hypothesis that the data is normal") else: print("We cannot reject that the data came from a normal distribution")
def exp_symbols_statistics(fout_path=os.path.join( DATA_DIR, 'exp_symbols_statistics.xlsx')): """ statistics of experiment symbols output the results to xlsx """ t0 = time() fin_path = os.path.join(SYMBOLS_PKL_DIR, 'TAIEX_2005_largest50cap_panel.pkl') # shape: (n_exp_period, n_stock, ('simple_roi', 'close_price')) panel = pd.read_pickle(fin_path) assert panel.major_axis.tolist() == EXP_SYMBOLS panel = panel.loc[date(2005, 1, 3):date(2014, 12, 31)] # the roi in the first experiment date is zero panel.loc[date(2005, 1, 3), :, 'simple_roi'] = 0. stat_indices = ( # basic information 'start_date', 'end_date', 'n_exp_period', 'n_period_up', 'n_period_down', # roi 'cum_roi', 'daily_roi', 'daily_mean_roi', 'daily_std_roi', 'daily_skew_roi', 'daily_kurt_roi', # roi/risk indices 'sharpe', 'sortino_full', 'sortino_full_semi_std', 'sortino_partial', 'sortino_partial_semi_std', 'max_abs_drawdown', # normal tests 'JB', 'JB_pvalue', # uni-root tests 'ADF_c', 'ADF_c_pvalue', 'ADF_ct', 'ADF_ct_pvalue', 'ADF_ctt', 'ADF_ctt_pvalue', 'ADF_nc', 'ADF_nc_pvalue', 'DFGLS_c', 'DFGLS_c_pvalue', 'DFGLS_ct', 'DFGLS_ct_pvalue', 'PP_c', 'PP_c_pvalue', 'PP_ct', 'PP_ct_pvalue', 'PP_nc', 'PP_nc_pvalue', 'KPSS_c', 'KPSS_c_pvalue', 'KPSS_ct', 'KPSS_ct_pvalue', # performance 'SPA_l_pvalue', 'SPA_c_pvalue', 'SPA_u_pvalue' ) stat_df = pd.DataFrame(np.zeros((len(stat_indices), len(EXP_SYMBOLS))), index=stat_indices, columns=EXP_SYMBOLS) for rdx, symbol in enumerate(EXP_SYMBOLS): t1 = time() rois = panel[:, symbol, 'simple_roi'] # basic stat_df.loc['start_date', symbol] = rois.index[0].strftime("%Y/%b/%d") stat_df.loc['end_date', symbol] = rois.index[-1].strftime("%Y/%b/%d") stat_df.loc['n_exp_period', symbol] = len(rois) stat_df.loc['n_period_up', symbol] = (rois > 0).sum() stat_df.loc['n_period_down', symbol] = (rois < 0).sum() # roi stat_df.loc['cum_roi', symbol] = (rois + 1.).prod() - 1 stat_df.loc['daily_roi', symbol] = np.power((rois + 1.).prod(), 1. / len(rois)) - 1 stat_df.loc['daily_mean_roi', symbol] = rois.mean() stat_df.loc['daily_std_roi', symbol] = rois.std() stat_df.loc['daily_skew_roi', symbol] = rois.skew() stat_df.loc['daily_kurt_roi', symbol] = rois.kurt() # excess # roi/risk indices stat_df.loc['sharpe', symbol] = sharpe(rois) (stat_df.loc['sortino_full', symbol], stat_df.loc['sortino_full_semi_std', symbol]) = sortino_full(rois) (stat_df.loc['sortino_partial', symbol], stat_df.loc['sortino_partial_semi_std', symbol]) = sortino_partial( rois) stat_df.loc['max_abs_drawdown', symbol] = maximum_drawdown(rois) # normal tests jb = jarque_bera(rois) stat_df.loc['JB', symbol] = jb[0] stat_df.loc['JB_pvalue', symbol] = jb[1] # uniroot tests adf_c = adfuller(rois, regression='c') stat_df.loc['ADF_c', symbol] = adf_c[0] stat_df.loc['ADF_c_pvalue', symbol] = adf_c[1] adf_ct = adfuller(rois, regression='ct') stat_df.loc['ADF_ct', symbol] = adf_ct[0] stat_df.loc['ADF_ct_pvalue', symbol] = adf_ct[1] adf_ctt = adfuller(rois, regression='ctt') stat_df.loc['ADF_ctt', symbol] = adf_ctt[0] stat_df.loc['ADF_ctt_pvalue', symbol] = adf_ctt[1] adf_nc = adfuller(rois, regression='nc') stat_df.loc['ADF_nc', symbol] = adf_nc[0] stat_df.loc['ADF_nc_pvalue', symbol] = adf_nc[1] dfgls_c_instance = DFGLS(rois, trend='c') dfgls_c, dfgls_c_pvalue = (dfgls_c_instance.stat, dfgls_c_instance.pvalue) stat_df.loc['DFGLS_c', symbol] = dfgls_c stat_df.loc['DFGLS_c_pvalue', symbol] = dfgls_c_pvalue dfgls_ct_instance = DFGLS(rois, trend='ct') dfgls_ct, dfgls_ct_pvalue = (dfgls_ct_instance.stat, dfgls_ct_instance.pvalue) stat_df.loc['DFGLS_ct', symbol] = dfgls_ct stat_df.loc['DFGLS_ct_pvalue', symbol] = dfgls_ct_pvalue pp_c_instance = PhillipsPerron(rois, trend='c') pp_c, pp_c_pvalue = (pp_c_instance.stat, pp_c_instance.pvalue) stat_df.loc['PP_c', symbol] = pp_c stat_df.loc['PP_c_pvalue', symbol] = pp_c_pvalue pp_ct_instance = PhillipsPerron(rois, trend='ct') pp_ct, pp_ct_pvalue = (pp_ct_instance.stat, pp_ct_instance.pvalue) stat_df.loc['PP_ct', symbol] = pp_ct stat_df.loc['PP_ct_pvalue', symbol] = pp_ct_pvalue pp_nc_instance = PhillipsPerron(rois, trend='nc') pp_nc, pp_nc_pvalue = (pp_nc_instance.stat, pp_nc_instance.pvalue) stat_df.loc['PP_nc', symbol] = pp_nc stat_df.loc['PP_nc_pvalue', symbol] = pp_nc_pvalue kpss_c_instance = KPSS(rois, trend='c') kpss_c, kpss_c_pvalue = (kpss_c_instance.stat, kpss_c_instance.pvalue) stat_df.loc['KPSS_c', symbol] = kpss_c stat_df.loc['KPSS_c_pvalue', symbol] = kpss_c_pvalue kpss_ct_instance = KPSS(rois, trend='ct') kpss_ct, kpss_ct_pvalue = (kpss_ct_instance.stat, kpss_ct_instance.pvalue) stat_df.loc['KPSS_ct', symbol] = kpss_ct stat_df.loc['KPSS_ct_pvalue', symbol] = kpss_ct_pvalue # performance spa = SPA(rois, np.zeros(len(rois)), reps=5000) spa.seed(np.random.randint(0, 2 ** 31 - 1)) spa.compute() stat_df.loc['SPA_l_pvalue', symbol] = spa.pvalues[0] stat_df.loc['SPA_c_pvalue', symbol] = spa.pvalues[1] stat_df.loc['SPA_u_pvalue', symbol] = spa.pvalues[2] print ("[{}/{}] {} roi statistics OK, {:.3f} secs".format( rdx + 1, len(EXP_SYMBOLS), symbol, time() - t1 )) # write to excel writer = pd.ExcelWriter(fout_path, engine='xlsxwriter') stat_df = stat_df.T stat_df.to_excel(writer, sheet_name='stats') # Get the xlsxwriter workbook and worksheet objects. workbook = writer.book worksheet = writer.sheets['stats'] # basic formats. # set header header_fmt = workbook.add_format() header_fmt.set_text_wrap() worksheet.set_row(0, 15, header_fmt) # set date date_fmt = workbook.add_format({'num_format': 'yy/mmm/dd'}) date_fmt.set_align('right') worksheet.set_column('B:C', 12, date_fmt) # set percentage percent_fmt = workbook.add_format({'num_format': '0.00%'}) worksheet.set_column('G:J', 8, percent_fmt) worksheet.set_column('M:Q', 8, percent_fmt) worksheet.set_column('T:T', 8, percent_fmt) worksheet.set_column('V:V', 8, percent_fmt) worksheet.set_column('X:X', 8, percent_fmt) worksheet.set_column('Z:Z', 8, percent_fmt) worksheet.set_column('AB:AB', 8, percent_fmt) worksheet.set_column('AD:AD', 8, percent_fmt) worksheet.set_column('AF:AF', 8, percent_fmt) worksheet.set_column('AH:AH', 8, percent_fmt) worksheet.set_column('AJ:AJ', 8, percent_fmt) worksheet.set_column('AL:AL', 8, percent_fmt) worksheet.set_column('AN:AN', 8, percent_fmt) worksheet.set_column('AP:AP', 8, percent_fmt) worksheet.set_column('AQ:AS', 8, percent_fmt) writer.save() print ("all roi statistics OK, {:.3f} secs".format(time() - t0))
model = ARMA(x, order=(p, q)).fit() if best_AIC == 0: best_AIC = model.aic if model.aic < best_AIC: best_order = [p, q] best_AIC = model.aic except: continue arma_model = ARMA(x, order=(best_order[0], best_order[1])).fit() print("The best order is " + str(p) + ' ' + str(q)) print(arma_model.summary()) #%% # CHECK IF RESIDUAL IS WHITE NOISE residuals = arma_model.resid score, p_value, _, _ = jarque_bera(residuals) lbvalue, pvalue, bpvalue, bppvalue = acorr_ljungbox(residuals, boxpierce=True) if p_value < 0.05: print( "We have reason to suspect that the residuals are not normally distributed" ) else: print("The residuals seem normally distributed") if pvalue < 0.05: print("We have reason to suspect that the residuals are autocorrelated") else: print("The residuals seem like white noise")
def buyHoldPortfolio(symbols, startDate=date(2005,1,3), endDate=date(2013,12,31), money=1e6, buyTransFee=0.001425, sellTransFee=0.004425, save_latex=False, save_csv=True, debug=False): t = time.time() #read df dfs = [] transDates = None for symbol in symbols: df = pd.read_pickle(os.path.join(PklBasicFeaturesDir, '%s.pkl'%symbol)) tmp = df[startDate: endDate] startIdx = df.index.get_loc(tmp.index[0]) endIdx = df.index.get_loc(tmp.index[-1]) data = df[startIdx: endIdx+1]['adjROI']/100. #check all data have the same transDates if transDates is None: transDates = data.index.values if not np.all(transDates == data.index.values): raise ValueError('symbol %s do not have the same trans. dates'%(symbol)) dfs.append(data) #initialize n_rv = len(dfs) symbols.append('deposit') wealthProcess = pd.DataFrame(columns=symbols, index=transDates) #allocation for symbol in symbols[:-1]: wealthProcess[symbol][transDates[0]] = money/n_rv * (1-buyTransFee) wealthProcess['deposit'] = 0 #buy and hold for sdx, symbol in enumerate(symbols[:-1]): for tdx, transDate in enumerate(transDates[1:]): tm1 = transDates[tdx] roi = dfs[sdx][transDate] wealthProcess[symbol][transDate] = wealthProcess[symbol][tm1] * (1+roi) #sell in the last period for symbol in symbols[:-1]: wealthProcess[symbol][-1] *= (1-sellTransFee) wealth = wealthProcess.sum(axis=1) pROI = (wealth[-1]/1e6 -1) * 100 prois = wealth.pct_change() prois[0] = 0 ret = sss.jarque_bera(prois) JB = ret[1] ret2 = sts.adfuller(prois) ADF = ret2[1] resultDir = os.path.join(ExpResultsDir, "BuyandHoldPortfolio") if not os.path.exists(resultDir): os.makedirs(resultDir) fileName = os.path.join(resultDir, 'BuyandHold_result_2005.csv') statName = os.path.join(resultDir, 'BuyandHold_result_2005.txt') df_name = os.path.join(resultDir,"wealthProcess_n%s.pkl"%(len(dfs))) df2_name = os.path.join(resultDir,"wealthSum_n%s.pkl"%(len(dfs))) csv_name = os.path.join(resultDir,"wealthProcess_n%s.csv"%(len(dfs))) csv2_name = os.path.join(resultDir,"wealthSum_n%s.csv"%(len(dfs))) wealthProcess.to_csv(csv_name) wealth.to_csv(csv2_name) wealthProcess.to_pickle(df_name) wealth.to_pickle(df2_name) csvIO = StringIO() statIO = StringIO() if not os.path.exists(fileName): csvIO.write('n_rv, wealth, wROI(%), ROI(%%), stdev, skew, kurt,') csvIO.write('Sp(%%), StF(%%), StP(%%), downDevF, downDevP, JB, ADF\n') statIO.write('$n$ & $R_{C}$(\%) & $R_{A}$(\%) & $\mu$(\%) & $\sigma$(\%) & skew & kurt & $S_p$(\%) & $S_o$(\%) & JB & ADF \\\ \hline \n') sharpe = Performance.Sharpe(prois) sortinof, ddf = Performance.SortinoFull(prois) sortinop, ddp = Performance.SortinoPartial(prois) csvIO.write('%s,%s,%s,%s,%s,%s,%s,'%(n_rv, wealth[-1], pROI, prois.mean()*100, prois.std()*100, spstats.skew(prois), spstats.kurtosis(prois) )) csvIO.write('%s,%s,%s,%s,%s,%s,%s\n'%(sharpe*100, sortinof*100, sortinop*100, ddf*100, ddp*100, JB, ADF)) statIO.write('%2d & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2e & %4.2e \\\ \hline \n'%( n_rv, pROI, (np.power(wealth[-1]/1e6, 1./9)-1)*100, prois.mean()*100, prois.std()*100, spstats.skew(prois), spstats.kurtosis(prois), sharpe*100, sortinof*100, JB, ADF )) with open(fileName, 'ab') as fout: fout.write(csvIO.getvalue()) csvIO.close() with open(statName, 'ab') as fout: fout.write(statIO.getvalue()) statIO.close() print "buyhold portfolio %s %s_%s pROI:%.3f%%, %.3f secs"%(startDate, endDate, n_rv, pROI, time.time() -t )
from matplotlib.finance import quotes_historical_yahoo_ohlc from datetime import date import numpy as np from scipy import stats from statsmodels.stats.stattools import jarque_bera import matplotlib.pyplot as plt def get_close(symbol): today = date.today() start = (today.year - 1, today.month, today.day) quotes = quotes_historical_yahoo_ohlc(symbol, start, today) quotes = np.array(quotes) return quotes.T[4] #(2) 计算DIA和SPY的对数收益率。先对收盘价取自然对数,然后计算连续值之间的差值,即得到对数收益率。 spy = np.diff(np.log(get_close("SPY"))) dia = np.diff(np.log(get_close("DIA"))) #(3) 均值检验可以检查两组不同的样本是否有相同的均值。返回值有两个,其中第二个为p-value,取值范围为0~1。 print ("Means comparison", stats.ttest_ind(spy, dia)) # (4) Kolmogorov-Smirnov检验可以判断两组样本同分布的可能性 print ("Kolmogorov smirnov test", stats.ks_2samp(spy, dia)) # (5) 在两只股票对数收益率的差值上应用Jarque-Bera正态性检验。 print ("Jarque Bera test", jarque_bera(spy - dia)[1]) # (6) 使用Matplotlib绘制对数收益率以及其差值的直方图。 plt.hist(spy, histtype="step", lw=1, label="SPY") plt.hist(dia, histtype="step", lw=2, label="DIA") plt.hist(spy - dia, histtype="step", lw=3, label="Delta") plt.legend() plt.show()
def _safe_jarque_bera(c): a = np.asarray(c) if a.shape[0] < 2: return (np.nan, ) * 4 return jarque_bera(a)
# Remember that each test is written a little differently across different programming languages. You might not know whether it's the null or alternative hypothesis that the tested data comes from a normal distribution. It is recommended that you use the `?` notation plus online searching to find documentation on the test; plus it is often a good idea to calibrate a test by checking it on simulated data and making sure it gives the right answer. Let's do that now. # In[7]: from statsmodels.stats.stattools import jarque_bera N = 1000 M = 1000 pvalues = np.ndarray((N)) for i in range(N): # Draw M samples from a normal distribution X = np.random.normal(0, 1, M); _, pvalue, _, _ = jarque_bera(X) pvalues[i] = pvalue # count number of pvalues below our default 0.05 cutoff num_significant = len(pvalues[pvalues < 0.05]) print float(num_significant) / N # Great, if properly calibrated we should expect to be wrong $5\%$ of the time at a 0.05 significance level, and this is pretty close. This means that the test is working as we expect. # In[8]: _, pvalue, _, _ = jarque_bera(returns)