def main():
    assert exists(SDCIT_RESULT_DIR + '/kcipt_chaotic_5000.csv'), 'run_SDCIT first'
    assert exists(SDCIT_RESULT_DIR + '/kcipt_chaotic_20000.csv'), 'run_SDCIT first'

    from experiments.draw_figures import color_palettes, method_color_codes

    obj_filename = SDCIT_RESULT_DIR + '/right_power.pickle'
    experiment(obj_filename)

    time.sleep(3)

    with open(obj_filename, 'rb') as f:  # Python 3: open(..., 'rb')
        sdcit_mmd, sdcit_null, mmds100, outer_null100, desired_B, mmds_B, outer_null_B, distr_boot = pickle.load(f)

    print(desired_B)
    print('SKEW SDCIT NULL: {}'.format(scipy.stats.skew(sdcit_null)))
    print('SKEW KCIPT NULL: {}'.format(scipy.stats.skew(outer_null_B)))

    names_kcipt_chaotic = ['independent', 'gamma', 'trial', 'N', 'statistic', 'pvalue', 'B']
    names_sdcit_chaotic = ['independent', 'gamma', 'trial', 'N', 'statistic', 'pvalue']

    df_kcipt_desired_B = pd.read_csv(SDCIT_RESULT_DIR + '/kcipt_chaotic_{}.csv'.format(desired_B), names=names_kcipt_chaotic, )
    df_kcipt_5000 = pd.read_csv(SDCIT_RESULT_DIR + '/kcipt_chaotic_5000.csv', names=names_kcipt_chaotic, )
    df_kcipt_20000 = pd.read_csv(SDCIT_RESULT_DIR + '/kcipt_chaotic_20000.csv', names=names_kcipt_chaotic, )
    df_sdcit = pd.read_csv(SDCIT_RESULT_DIR + '/sdcit_chaotic.csv', names=names_sdcit_chaotic, )
    df_sdcit = df_sdcit[df_sdcit['N'] == 400]
    df_sdcit = df_sdcit[df_sdcit['independent'] == 1]
    df_sdcit = df_sdcit[df_sdcit['gamma'] == 0.0]
    assert len(df_sdcit) == 300
    xs_sdcit = np.linspace(1.3 * sdcit_null.min(), 1.3 * sdcit_null.max(), 1000)
    ys_sdcit_pearson3 = pearson3.pdf(xs_sdcit, *pearson3.fit(sdcit_null))

    xs_kcipt = np.linspace(1.3 * outer_null_B.min(), 1.3 * outer_null_B.max(), 1000)
    ys_kcipt_pearson3 = pearson3.pdf(xs_kcipt, *pearson3.fit(outer_null_B))

    # 20000's null is inferred from known one...
    factor_20000 = np.sqrt(20000 / desired_B)
    ys_kcipt_20000_gamma = gamma.pdf(xs_kcipt, *gamma.fit(outer_null_B / factor_20000))

    sns.set(style='white', font_scale=1.2)
    paper_rc = {'lines.linewidth': 0.8, 'lines.markersize': 2, 'patch.linewidth': 1}
    sns.set_context("paper", rc=paper_rc)
    plt.rc('text', usetex=True)
    plt.rc('text.latex', preamble=r'\usepackage{cmbright}')

    if True:
        fig = plt.figure(figsize=[5, 3.5])
        ##################################
        fig.add_subplot(2, 2, 1, adjustable='box')

        plt.plot(xs_sdcit, ys_sdcit_pearson3, label='SDCIT null', lw=1.5, color=color_palettes[method_color_codes['SDCIT']])
        plt.plot([sdcit_mmd, sdcit_mmd], [0, 1000], label='SDCIT TS', color=color_palettes[method_color_codes['SDCIT']])
        plt.plot(xs_kcipt, ys_kcipt_pearson3, label='KCIPT null', lw=1.5, color=color_palettes[method_color_codes['KCIPT']])
        sns.distplot(distr_boot, hist=True, kde=False, hist_kws={'histtype': 'stepfilled'}, norm_hist=True, label='KCIPT TS', color=color_palettes[method_color_codes['KCIPT']])
        plt.gca().set_xlim([-0.0003, 0.0005])
        plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
        plt.gca().set_ylabel('density')
        plt.setp(plt.gca(), 'yticklabels', [])
        plt.legend(loc=1)
        ##################################
        fig.add_subplot(2, 2, 2, adjustable='box')

        pvals_B = [p_value_of(t, outer_null_B) for t in distr_boot]
        pval_sdcit = p_value_of(sdcit_mmd, sdcit_null)

        sns.distplot(pvals_B, bins=20, hist=True, kde=False, hist_kws={'histtype': 'stepfilled'}, norm_hist=True, color=color_palettes[method_color_codes['KCIPT']], label='KCIPT p-values')
        plt.plot([pval_sdcit, pval_sdcit], [0, 1], label='SDCIT p-value', color=color_palettes[method_color_codes['SDCIT']])
        plt.gca().set_ylim([0, 2.2])
        plt.gcf().subplots_adjust(wspace=0.3)
        plt.legend(loc=2)
        sns.despine()

        ##################################
        fig.add_subplot(2, 2, 3, adjustable='box')
        sns.distplot(df_sdcit['statistic'], hist=True, bins=20, kde=False, color=color_palettes[method_color_codes['SDCIT']], label='SDCIT TS')
        sns.distplot(df_kcipt_desired_B['statistic'], hist=True, bins=20, kde=False, color=color_palettes[method_color_codes['KCIPT']], label='KCIPT TS')
        plt.legend()
        plt.gca().set_xlim([-0.0003, 0.0005])
        plt.gca().set_xlabel('MMD')
        plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
        plt.gca().set_ylabel('density')
        plt.setp(plt.gca(), 'yticklabels', [])

        ##################################
        fig.add_subplot(2, 2, 4, adjustable='box')

        sns.distplot(df_sdcit['pvalue'], hist=True, bins=20, kde=False, color=color_palettes[method_color_codes['SDCIT']], norm_hist=True, label='SDCIT p-values')
        sns.distplot(df_kcipt_desired_B['pvalue'], hist=True, bins=20, kde=False, color=color_palettes[method_color_codes['KCIPT']], norm_hist=True, label='KCIPT p-values')
        plt.gca().set_xlabel('p-value')
        plt.gcf().subplots_adjust(wspace=0.3, hspace=0.3)
        plt.gca().set_ylim([0, 2.2])
        plt.legend(loc=0)
        sns.despine()
        plt.savefig(SDCIT_FIGURE_DIR + '/kcipt_{}_ps.pdf'.format(desired_B), transparent=True, bbox_inches='tight', pad_inches=0.02)
        plt.close()

    ###############################################
    ###############################################
    ###############################################
    ###############################################
    ###############################################
    ###############################################
    if True:
        sns.set(style='white', font_scale=1.2)
        paper_rc = {'lines.linewidth': 0.8, 'lines.markersize': 2, 'patch.linewidth': 1}
        sns.set_context("paper", rc=paper_rc)
        plt.rc('text', usetex=True)
        plt.rc('text.latex', preamble=r'\usepackage{cmbright}')
        fig = plt.figure(figsize=[5, 1.6])
        ##################################
        fig.add_subplot(1, 2, 1, adjustable='box')
        sns.distplot(df_kcipt_5000['statistic'], hist=True, bins=20, kde=False, color=color_palettes[method_color_codes['KCIPT']], label='TS')
        plt.legend()
        plt.gca().set_xlabel('MMD')
        plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
        plt.gca().set_ylabel('density')
        plt.gca().set_xlim([-0.0002, 0.0003])
        plt.setp(plt.gca(), 'yticklabels', [])
        ##
        fig.add_subplot(1, 2, 2, adjustable='box')
        sns.distplot(df_kcipt_5000['pvalue'], hist=True, bins=20, kde=False, color=color_palettes[method_color_codes['KCIPT']], norm_hist=True, label='p-value')
        plt.gca().set_xlabel('p-value')
        plt.gcf().subplots_adjust(wspace=0.3, hspace=0.3)
        plt.legend(loc=0)
        sns.despine()
        plt.savefig(SDCIT_FIGURE_DIR + '/kcipt_5000_ps.pdf', transparent=True, bbox_inches='tight', pad_inches=0.02)
        plt.close()

    if True:
        sns.set(style='white', font_scale=1.2)
        paper_rc = {'lines.linewidth': 0.8, 'lines.markersize': 2, 'patch.linewidth': 1}
        sns.set_context("paper", rc=paper_rc)
        plt.rc('text', usetex=True)
        plt.rc('text.latex', preamble=r'\usepackage{cmbright}')
        fig = plt.figure(figsize=[5, 1.6])

        # left subplot
        fig.add_subplot(1, 2, 1, adjustable='box')
        plt.plot(xs_sdcit, ys_sdcit_pearson3, label='SDCIT null', lw=1.5, color=color_palettes[method_color_codes['SDCIT']])
        plt.plot(xs_kcipt, ys_kcipt_20000_gamma, label='KCIPT null', lw=1.5, color=color_palettes[method_color_codes['KCIPT']])
        sns.distplot(df_kcipt_20000['statistic'], hist=True, bins=20, kde=False, norm_hist=True, color=color_palettes[method_color_codes['KCIPT']], label='KCIPT TS')
        plt.legend(loc=1)
        plt.gca().set_xlabel('MMD')
        plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
        plt.gca().set_ylabel('density')
        plt.gca().set_xlim([-0.0002, 0.0003])
        plt.setp(plt.gca(), 'yticklabels', [])

        # right subplot
        fig.add_subplot(1, 2, 2, adjustable='box')
        sns.distplot(df_kcipt_20000['pvalue'], hist=True, bins=20, kde=False, color=color_palettes[method_color_codes['KCIPT']], norm_hist=True, label='KCIPT p')
        sns.distplot([p_value_of(ss, sdcit_null) for ss in df_kcipt_20000['statistic']], hist=True, bins=20, kde=False, color='k', norm_hist=True, label='KCIPT p on SDCIT null')
        plt.gca().set_xlabel('p-value')
        plt.gcf().subplots_adjust(wspace=0.3, hspace=0.3)
        plt.legend(loc=0)

        sns.despine()
        plt.savefig(SDCIT_FIGURE_DIR + '/kcipt_20000_ps.pdf', transparent=True, bbox_inches='tight', pad_inches=0.02)
        plt.close()
 def precip_test_pearson(self, start, end):
     args = pearson3.fit(self.get_precip(start, end))
     return kstest(self.get_precip(start, end), "pearson3", args=args)
# Box 8.1
import numpy as np
import pandas as pd
#from scipy.stats import describe
from scipy.stats import pearson3
#import matplotlib.pyplot as plt
#import statsmodels.formula.api as sm
#import statsmodels.tsa.stattools as ts
#import statsmodels.tsa.vector_ar.vecm as vm
from scipy.optimize import minimize

df = pd.read_csv('AUDCAD_unequal_ret.csv')

skew_, loc_, scale_ = pearson3.fit(df)
print('skew=%f loc=%f scale=%f' % (skew_, loc_, scale_))
mean, var, skew, kurt = pearson3.stats(skew_, loc_, scale_, moments='mvks')
print('mean=%f var=%f skew=%f kurt=%f' % (mean, var, skew, kurt))

ret_sim = pearson3.rvs(skew_, loc_, scale_, size=100000, random_state=0)


def g(f, R):
    return np.sum(np.log(1 + f * R), axis=0) / R.shape[0]


myf = range(1, 24)
myg = np.full(24, np.NaN)

for f in myf:
    myg[f] = g(f, ret_sim)
 def discharge_test_pearson(self, start, end):
     args = pearson3.fit(self.get_discharge(start, end))
     return kstest(self.get_discharge(start, end), "pearson3", args=args)
Ejemplo n.º 5
0
marketRet = df['Close'].pct_change()

ret = np.nansum(np.array(pd.DataFrame(pos).shift()) * np.array(marketRet),
                axis=1) / capital / holddays

sharpe = np.sqrt(len(ret)) * np.nanmean(ret) / np.nanstd(ret)

print("Gaussian Test statistic=%f" % sharpe)
#Gaussian Test statistic=2.769741

# Randomized market returns hypothesis test
# =============================================================================
#_,_,mean,var,skew,kurt=describe(marketRet, nan_policy='omit')
# =============================================================================
skew_, loc_, scale_ = pearson3.fit(marketRet[1:])  # First element is NaN
numSampleAvgretBetterOrEqualObserved = 0
for sample in range(10000):
    marketRet_sim = pearson3.rvs(skew=skew_,
                                 loc=loc_,
                                 scale=scale_,
                                 size=marketRet.shape[0],
                                 random_state=sample)
    cl_sim = np.cumproduct(1 + marketRet_sim) - 1

    longs_sim = cl_sim > pd.Series(cl_sim).shift(lookback)
    shorts_sim = cl_sim < pd.Series(cl_sim).shift(lookback)

    pos_sim = np.zeros(cl_sim.shape[0])

    for h in range(0, holddays):