コード例 #1
0
def data_description(index, start, end):
    returns = download_data.get_returns(index, start, end)
    print('个数:', len(returns))
    print('平均值:', np.mean(returns))
    print('中位数:', np.median(returns))
    print('上四分位数', sts.quantile(returns, p=0.25))
    print('下四分位数', sts.quantile(returns, p=0.75))
    #离散趋势的度量
    print('最大值:', np.max(returns))
    print('最小值:', np.min(returns))
    print('极差:', np.max(returns) - np.min(returns))
    print('四分位差',
          sts.quantile(returns, p=0.75) - sts.quantile(returns, p=0.25))
    print('标准差:', np.std(returns))
    print('方差:', np.var(returns))
    print('离散系数:', np.std(returns) / np.mean(returns))
    #偏度与峰度的度量
    print('偏度:', sts.skewness(returns))
    print('峰度:', sts.kurtosis(returns))
    print(st.kstest(returns, 'norm'))
    length = len(returns)
    sns.distplot(returns, bins=100, label='Empirical')
    sns.plt.legend()
    sns.plt.title('Empirical')
    sns.plt.show()
コード例 #2
0
def draw_lognorm(index, start, end):
    returns = download_data.get_returns(index, start, end)
    fit_data = st.norm(returns.mean(), returns.std()).rvs(10000000)
    sns.distplot(fit_data, label='log_norm', hist=None)
    length = len(returns)
    sns.distplot(returns, bins=100, label='Empirical')
    sns.plt.legend()
    sns.plt.title('LogNorm')
    sns.plt.show()
コード例 #3
0
def draw_mixture(index, start, end):
    returns = download_data.get_returns(index, start, end)
    length = len(returns)
    returns = returns.reshape((length, 1))
    aic = [0 for i in range(4)]
    bic = [0 for i in range(4)]
    for i in range(2, 6):
        gmm = GaussianMixture(n_components=i, tol=1e-6, covariance_type='full')
        fgmm = gmm.fit(returns)
        aic[i - 2] = fgmm.aic(returns)
        bic[i - 2] = fgmm.bic(returns)
    k1 = int(aic.index(min(aic))) + 2
    k2 = int(bic.index(min(bic))) + 2
    print(k1)
    print(k2)
    if k1 == k2:
        gmm = GaussianMixture(n_components=k1,
                              tol=1e-6,
                              covariance_type='full')
        fgmm = gmm.fit(returns)
        sample1 = ((fgmm.sample(10000000))[0].reshape(1, 10000000))
        sns.distplot(sample1, label='Gaussian', hist=None)
        sns.distplot(returns, bins=100, label='Empirical')
        sns.plt.legend()
        sns.plt.title('GaussianMixture')
        sns.plt.show()
    else:
        gmm1 = GaussianMixture(n_components=k1,
                               tol=1e-5,
                               covariance_type='full')
        fgmm1 = gmm1.fit(returns)
        gmm2 = GaussianMixture(n_components=k2,
                               tol=1e-5,
                               covariance_type='full')
        fgmm2 = gmm2.fit(returns)
        sample1 = ((fgmm1.sample(10000000))[0].reshape(1, 10000000))
        sample2 = ((fgmm2.sample(10000000))[0].reshape(1, 10000000))
        sns.distplot(sample1, label='Gaussian-AIC', hist=None)
        sns.distplot(sample2, label='Gaussian-BIC', hist=None)
        sns.distplot(returns, bins=100, label='Empirical')
        sns.plt.legend()
        sns.plt.title('GaussianMixture')
        sns.plt.show()
コード例 #4
0
import seaborn as sns
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import acf
from statsmodels.sandbox.stats.runs import runstest_1samp
import download_data


def get_log_return(file, length):
    data = pd.read_csv(file, encoding='gbk')
    returns = np.array(data[u'收盘价'] / data[u'前收盘价'])[-length:]
    log_returns = np.log(returns)
    return log_returns


SH = download_data.get_returns('sh000001', '20100101', '20170413')
SZ = download_data.get_returns('sz399001', '20100101', '20170413')
length1 = len(SZ)

HSI = get_log_return('HSI.CSV', length1)
DJONES = get_log_return('DJONES.CSV', length1)
SP500 = get_log_return('SP500.CSV', length1)
NASDAQ = get_log_return('NASDAQ.CSV', length1)
N225 = get_log_return('N225.CSV', length1)


def autocor_test(data):
    lag = int((len(data))**0.5)
    acf_result = acf(data, nlags=lag, qstat=True, alpha=0.05)
    runstest_result = runstest_1samp(data, cutoff='mean')
    plot_acf(data, lags=lag, alpha=0.05)