def data_description(index, start, end): returns = download_data.get_returns(index, start, end) print('个数:', len(returns)) print('平均值:', np.mean(returns)) print('中位数:', np.median(returns)) print('上四分位数', sts.quantile(returns, p=0.25)) print('下四分位数', sts.quantile(returns, p=0.75)) #离散趋势的度量 print('最大值:', np.max(returns)) print('最小值:', np.min(returns)) print('极差:', np.max(returns) - np.min(returns)) print('四分位差', sts.quantile(returns, p=0.75) - sts.quantile(returns, p=0.25)) print('标准差:', np.std(returns)) print('方差:', np.var(returns)) print('离散系数:', np.std(returns) / np.mean(returns)) #偏度与峰度的度量 print('偏度:', sts.skewness(returns)) print('峰度:', sts.kurtosis(returns)) print(st.kstest(returns, 'norm')) length = len(returns) sns.distplot(returns, bins=100, label='Empirical') sns.plt.legend() sns.plt.title('Empirical') sns.plt.show()
def draw_lognorm(index, start, end): returns = download_data.get_returns(index, start, end) fit_data = st.norm(returns.mean(), returns.std()).rvs(10000000) sns.distplot(fit_data, label='log_norm', hist=None) length = len(returns) sns.distplot(returns, bins=100, label='Empirical') sns.plt.legend() sns.plt.title('LogNorm') sns.plt.show()
def draw_mixture(index, start, end): returns = download_data.get_returns(index, start, end) length = len(returns) returns = returns.reshape((length, 1)) aic = [0 for i in range(4)] bic = [0 for i in range(4)] for i in range(2, 6): gmm = GaussianMixture(n_components=i, tol=1e-6, covariance_type='full') fgmm = gmm.fit(returns) aic[i - 2] = fgmm.aic(returns) bic[i - 2] = fgmm.bic(returns) k1 = int(aic.index(min(aic))) + 2 k2 = int(bic.index(min(bic))) + 2 print(k1) print(k2) if k1 == k2: gmm = GaussianMixture(n_components=k1, tol=1e-6, covariance_type='full') fgmm = gmm.fit(returns) sample1 = ((fgmm.sample(10000000))[0].reshape(1, 10000000)) sns.distplot(sample1, label='Gaussian', hist=None) sns.distplot(returns, bins=100, label='Empirical') sns.plt.legend() sns.plt.title('GaussianMixture') sns.plt.show() else: gmm1 = GaussianMixture(n_components=k1, tol=1e-5, covariance_type='full') fgmm1 = gmm1.fit(returns) gmm2 = GaussianMixture(n_components=k2, tol=1e-5, covariance_type='full') fgmm2 = gmm2.fit(returns) sample1 = ((fgmm1.sample(10000000))[0].reshape(1, 10000000)) sample2 = ((fgmm2.sample(10000000))[0].reshape(1, 10000000)) sns.distplot(sample1, label='Gaussian-AIC', hist=None) sns.distplot(sample2, label='Gaussian-BIC', hist=None) sns.distplot(returns, bins=100, label='Empirical') sns.plt.legend() sns.plt.title('GaussianMixture') sns.plt.show()
import seaborn as sns import numpy as np import pandas as pd from statsmodels.tsa.stattools import acf from statsmodels.sandbox.stats.runs import runstest_1samp import download_data def get_log_return(file, length): data = pd.read_csv(file, encoding='gbk') returns = np.array(data[u'收盘价'] / data[u'前收盘价'])[-length:] log_returns = np.log(returns) return log_returns SH = download_data.get_returns('sh000001', '20100101', '20170413') SZ = download_data.get_returns('sz399001', '20100101', '20170413') length1 = len(SZ) HSI = get_log_return('HSI.CSV', length1) DJONES = get_log_return('DJONES.CSV', length1) SP500 = get_log_return('SP500.CSV', length1) NASDAQ = get_log_return('NASDAQ.CSV', length1) N225 = get_log_return('N225.CSV', length1) def autocor_test(data): lag = int((len(data))**0.5) acf_result = acf(data, nlags=lag, qstat=True, alpha=0.05) runstest_result = runstest_1samp(data, cutoff='mean') plot_acf(data, lags=lag, alpha=0.05)