def symbol_statistics(start_date=dt.date(1990, 1, 1), end_date=dt.date(2017, 12, 31)): """ the statistics of the return of the specified stocks """ import csv import json import statsmodels.tsa.stattools as tsa_tools import scipy.stats as spstats import portfolio_programming.statistics.risk_adjusted as risk_adj import arch.bootstrap.multiple_comparison as arch_comp symbols = json.load(open(os.path.join(pp.DATA_DIR, 'DJIA_symbols_20170901.json'))) data_xarr = xr.open_dataarray(os.path.join(pp.DATA_DIR, 'DJIA_symbols_20170901.nc')) with open(os.path.join(pp.TMP_DIR, 'DJIA_symbols_20170901_stat.csv'), 'w') as csv_file: fields = ["rank", 'symbol', 'start_date', 'end_date', "n_data", "cum_roi", "annual_roi", "roi_mu", "std", "skew", "ex_kurt", "Sharpe", "Sortino", "JB", "worst_ADF", "SPA_c"] writer = csv.DictWriter(csv_file, fieldnames=fields) writer.writeheader() for sdx, symbol in enumerate(symbols): rois = data_xarr.loc[start_date:end_date, symbol, 'simple_roi'] trans_dates = rois.get_index('trans_date') rois = rois.data # to numpy rois = rois[~np.isnan(rois)] # filter the nan n_roi = len(rois) rois[0] = 0 cumulative_roi = float((1 + rois).prod() - 1) annual_roi = float(np.power(cumulative_roi + 1, 1. / 10) - 1) sharpe = risk_adj.Sharpe(rois) sortino = risk_adj.Sortino_full(rois)[0] jb = spstats.jarque_bera(rois)[1] # worse case of adf adf_c = tsa_tools.adfuller(rois, regression='c')[1] adf_ct = tsa_tools.adfuller(rois, regression='ct')[1] adf_ctt = tsa_tools.adfuller(rois, regression='ctt')[1] adf_nc = tsa_tools.adfuller(rois, regression='nc')[1] adf = max(adf_c, adf_ct, adf_ctt, adf_nc) spa_value = 0 for _ in range(5): spa = arch_comp.SPA(rois, np.zeros(n_roi), reps=1000) spa.seed(np.random.randint(0, 2 ** 31 - 1)) spa.compute() # preserve the worse p_value if spa.pvalues[1] > spa_value: spa_value = spa.pvalues[1] writer.writerow({ "rank": sdx + 1, "symbol": symbol, "start_date": trans_dates[0].strftime("%Y-%m-%d"), "end_date": trans_dates[-1].strftime("%Y-%m-%d"), "n_data": n_roi, "cum_roi": cumulative_roi, "annual_roi": annual_roi, "roi_mu": float(rois.mean()), "std": float(rois.std(ddof=1)), "skew": spstats.skew(rois, bias=False), "ex_kurt": spstats.kurtosis(rois, bias=False), "Sharpe": sharpe, "Sortino": sortino, "JB": jb, "worst_ADF": adf, "SPA_c": spa_value, }) print("[{}/{}] {}, cum_roi:{:.2%}".format( sdx + 1, len(symbols), symbol, cumulative_roi))
def get_poly_report(exp_type, report_dir=pp.WEIGHT_PORTFOLIO_REPORT_DIR): import pickle import pandas as pd import csv import arch.bootstrap.multiple_comparison as arch_comp if exp_type not in ('poly', 'nir', 'nofee_poly', 'nofee_nir'): raise ValueError('unknown exp_type:', exp_type) if exp_type in ('nofee_poly', 'nofee_nir'): report_dir = os.path.join(pp.DATA_DIR, 'report_weight_portfolio_nofee') else: report_dir = pp.WEIGHT_PORTFOLIO_REPORT_DIR group_names = pp.GROUP_SYMBOLS.keys() output_file = os.path.join(pp.TMP_DIR, "{}_stat.csv".format(exp_type)) with open(output_file, "w", newline='') as csv_file: fields = [ "simulation_name", "poly_power", "group_name", "start_date", "end_date", "n_data", "cum_roi", "annual_roi", "roi_mu", "std", "skew", "ex_kurt", "Sharpe", "Sortino_full", "Sortino_partial", "SPA_c" ] writer = csv.DictWriter(csv_file, fieldnames=fields) writer.writeheader() if exp_type in ('poly', 'nofee_poly'): polys = ["{:.2f}".format(val) for val in (2, 3, 4)] report_pkls = [ (group_name, "report_Poly_{}_{}_20050103_20181228.pkl".format( poly, group_name) ) for poly in polys for gdx, group_name in enumerate(group_names) ] elif exp_type in ('nir', 'nofee_nir'): polys = ["{:.2f}".format(val) for val in (2, 3, 4)] report_pkls = [ (group_name, "report_NIRPoly_{}_{}_20050103_20181228.pkl".format( poly, group_name) ) for poly in polys for gdx, group_name in enumerate(group_names) ] for group_name, report_name in report_pkls: report_file = os.path.join(report_dir, report_name) rp = pd.read_pickle(report_file) # SPA value if "SPA_c" not in rp.keys(): rois = rp['decision_xarr'].loc[:, :, 'wealth'].sum( axis=1).to_series().pct_change() rois[0] = 0 spa_value = 0 for _ in range(3): spa = arch_comp.SPA(rois.values, np.zeros(rois.size), reps=1000) spa.seed(np.random.randint(0, 2 ** 31 - 1)) spa.compute() # preserve the worse p_value if spa.pvalues[1] > spa_value: spa_value = spa.pvalues[1] rp['SPA_c'] = spa_value # write back to file with open(report_file, 'wb') as fout: pickle.dump(rp, fout, pickle.HIGHEST_PROTOCOL) poly_power_value = rp.get('poly_power', 'adaptive') writer.writerow( { "simulation_name": rp["simulation_name"], "group_name": group_name, "poly_power": poly_power_value, "start_date": rp['exp_start_date'].strftime("%Y-%m-%d"), "end_date": rp['exp_end_date'].strftime("%Y-%m-%d"), "n_data": rp['n_exp_period'], "cum_roi": rp['cum_roi'], "annual_roi": rp['annual_roi'], "roi_mu": rp['daily_mean_roi'], "std": rp['daily_std_roi'], "skew": rp['daily_skew_roi'], "ex_kurt": rp['daily_ex-kurt_roi'], "Sharpe": rp['Sharpe'], "Sortino_full": rp['Sortino_full'], "Sortino_partial": rp['Sortino_partial'], "SPA_c": rp['SPA_c'] } ) print( "{} {}, cum_roi:{:.2%}".format( rp["simulation_name"], group_name, rp['cum_roi'] ) )
def get_bah_report(report_dir=pp.WEIGHT_PORTFOLIO_REPORT_DIR): import arch.bootstrap.multiple_comparison as arch_comp group_names = pp.GROUP_SYMBOLS.keys() with open(os.path.join(pp.TMP_DIR, "BAH_stat.csv"), "w", newline='') as csv_file: fields = [ "simulation_name", "group_name", "start_date", "end_date", "n_data", "cum_roi", "annual_roi", "roi_mu", "std", "skew", "ex_kurt", "Sharpe", "Sortino_full", "Sortino_partial", "SPA_c" ] writer = csv.DictWriter(csv_file, fieldnames=fields) writer.writeheader() for gdx, group_name in enumerate(group_names): report_name = "report_BAH_{}_20050103_20181228.pkl".format( group_name) rp = pd.read_pickle(os.path.join(pp.WEIGHT_PORTFOLIO_REPORT_DIR, report_name)) rois = rp['decision_xarr'].loc[:, :, 'wealth'].sum( axis=1).to_series().pct_change() rois[0] = 0 spa_value = 0 for _ in range(3): spa = arch_comp.SPA(rois.values, np.zeros(rois.size), reps=1000) spa.seed(np.random.randint(0, 2 ** 31 - 1)) spa.compute() # preserve the worse p_value if spa.pvalues[1] > spa_value: spa_value = spa.pvalues[1] writer.writerow( { "simulation_name": rp["simulation_name"], "group_name": group_name, "start_date": rp['exp_start_date'].strftime("%Y-%m-%d"), "end_date": rp['exp_end_date'].strftime("%Y-%m-%d"), "n_data": rp['n_exp_period'], "cum_roi": rp['cum_roi'], "annual_roi": rp['annual_roi'], "roi_mu": rp['daily_mean_roi'], "std": rp['daily_std_roi'], "skew": rp['daily_skew_roi'], "ex_kurt": rp['daily_ex-kurt_roi'], "Sharpe": rp['Sharpe'], "Sortino_full": rp['Sortino_full'], "Sortino_partial": rp['Sortino_partial'], "SPA_c": spa_value } ) print( "[{}/{}] {}, cum_roi:{:.2%}".format( gdx + 1, len(group_names), group_name, rp['cum_roi'] ) )
def aggregating_reports(exp_name, setting, yearly=False): import arch.bootstrap.multiple_comparison as arch_comp if exp_name not in ('dissertation', 'stocksp_cor15'): raise ValueError('unknown exp_name:{}'.format(exp_name)) if setting not in ("compact", "general"): raise ValueError("Unknown SPSP_CVaR setting: {}".format(setting)) if exp_name == 'stocksp_cor15': if not yearly: # whole interval years = [[dt.date(2005, 1, 3), dt.date(2014, 12, 31)]] out_report_path = os.path.join( pp.DATA_DIR, "report_SPSP_CVaR_whole_{}_{}_{}_{}.nc".format( exp_name, setting, years[0][0].strftime("%Y%m%d"), years[0][1].strftime("%Y%m%d"))) else: years = [[dt.date(2005, 1, 3), dt.date(2005, 12, 30)], [dt.date(2006, 1, 2), dt.date(2006, 12, 29)], [dt.date(2007, 1, 2), dt.date(2007, 12, 31)], [dt.date(2008, 1, 2), dt.date(2008, 12, 31)], [dt.date(2009, 1, 5), dt.date(2009, 12, 31)], [dt.date(2010, 1, 4), dt.date(2010, 12, 31)], [dt.date(2011, 1, 3), dt.date(2011, 12, 30)], [dt.date(2012, 1, 2), dt.date(2012, 12, 28)], [dt.date(2013, 1, 2), dt.date(2013, 12, 31)], [dt.date(2014, 1, 2), dt.date(2014, 12, 31)] ] out_report_path = os.path.join(pp.DATA_DIR, "report_SPSP_CVaR_yearly_{}_{}_{}_{" "}.nc".format( exp_name, setting, years[0][0].strftime("%Y%m%d"), years[-1][1].strftime("%Y%m%d"))) intervals = ["{}_{}".format(s.strftime("%Y%m%d"), e.strftime("%Y%m%d")) for s, e in years] set_indices = [1, 2, 3] group_names = [] max_portfolio_sizes = range(5, 50 + 5, 5) window_sizes = range(60, 240 + 10, 10) n_scenarios = [200, ] alphas = ["{:.2f}".format(v / 100.) for v in range(50, 100, 5)] elif exp_name == 'dissertation': if not yearly: # whole interval years = [[dt.date(2005, 1, 3), dt.date(2018, 12, 28)]] out_report_path = os.path.join( pp.DATA_DIR, "report_SPSP_CVaR_whole_{}_{}_{}_{}.nc".format( exp_name, setting, years[0][0].strftime("%Y%m%d"), years[0][1].strftime("%Y%m%d"))) else: # yearly interval years = [[dt.date(2005, 1, 3), dt.date(2005, 12, 30)], [dt.date(2006, 1, 2), dt.date(2006, 12, 29)], [dt.date(2007, 1, 2), dt.date(2007, 12, 31)], [dt.date(2008, 1, 2), dt.date(2008, 12, 31)], [dt.date(2009, 1, 5), dt.date(2009, 12, 31)], [dt.date(2010, 1, 4), dt.date(2010, 12, 31)], [dt.date(2011, 1, 3), dt.date(2011, 12, 30)], [dt.date(2012, 1, 2), dt.date(2012, 12, 28)], [dt.date(2013, 1, 2), dt.date(2013, 12, 31)], [dt.date(2014, 1, 2), dt.date(2014, 12, 31)], [dt.date(2015, 1, 5), dt.date(2015, 12, 31)], [dt.date(2016, 1, 4), dt.date(2016, 12, 30)], [dt.date(2017, 1, 3), dt.date(2017, 12, 29)] ] out_report_path = os.path.join(pp.DATA_DIR, "report_SPSP_CVaR_yearly_{}_{}_{}_{" "}.nc".format( exp_name, setting, years[0][0].strftime("%Y%m%d"), years[-1][1].strftime("%Y%m%d"))) intervals = ["{}_{}".format(s.strftime("%Y%m%d"), e.strftime("%Y%m%d")) for s, e in years] set_indices = [1, ] group_names = list(pp.GROUP_SYMBOLS.keys()) max_portfolio_sizes = [5, ] window_sizes = range(50, 240 + 10, 10) n_scenarios = [1000, ] alphas = ["{:.2f}".format(v / 100.) for v in range(50, 100, 5)] originals = [ 'initial_wealth', 'final_wealth', 'cum_roi', 'daily_roi', 'daily_mean_roi', 'daily_std_roi', 'daily_skew_roi', 'daily_ex-kurt_roi', 'Sharpe', 'Sortino_full', 'Sortino_partial' ] # additional attributes additionals = ['annual_roi', 'daily_VSS', 'SPA_c'] attributes = originals + additionals report_xarr = xr.DataArray( np.zeros((len(years), len(group_names), len(set_indices), len(max_portfolio_sizes), len(window_sizes), len(alphas), len(attributes) )), dims=("interval", "group_name", "scenario_set_idx", "max_portfolio_size", "rolling_window_size", "alpha", "attribute"), coords=(intervals, group_names, set_indices, max_portfolio_sizes, window_sizes, alphas, attributes) ) t0 = time() # key: report_name, value: parameters report_dict = _all_spsp_cvar_params(exp_name, setting, yearly) report_count = 0 no_report_count_params = [] no_report_count = 0 parent_dir = pp.REPORT_DIR for idx, (name, param) in enumerate(report_dict.items()): t1 = time() path = os.path.join(parent_dir, name) print(path) exp_name, setting, grp, m, h, s, a, sdx, s_date, e_date = param interval = "{}_{}".format(s_date.strftime("%Y%m%d"), e_date.strftime("%Y%m%d")) alpha = "{:.2f}".format(a) try: report = pd.read_pickle(path) for attr in originals: report_xarr.loc[ interval, grp, sdx, m, h, alpha, attr] = report[attr] year_count = (e_date.year - s_date.year) + 1 for attr in additionals: if attr == 'annual_roi': val = np.power(report['cum_roi'] + 1, 1. / year_count) - 1 elif attr == 'daily_VSS': risks = report['estimated_risk_xarr'] val = float(risks.loc[:, 'VSS'].mean() / report[ 'initial_wealth']) elif attr == 'SPA_c': dec_xarr = report['decision_xarr'] wealth_arr = dec_xarr.loc[:, :, 'wealth'].sum( axis=1).to_series() rois = wealth_arr.pct_change() rois[0] = 0 # print(rois.values) spa_value = 0 for _ in range(3): spa = arch_comp.SPA(rois.values, np.zeros(rois.size), reps=1000) spa.seed(np.random.randint(0, 2 ** 31 - 1)) spa.compute() # preserve the worse p_value if spa.pvalues[1] > spa_value: spa_value = spa.pvalues[1] val = spa_value report_xarr.loc[ interval, grp, sdx, m, h, alpha, attr] = val report_count += 1 print("[{}/{}] {} {:.2%} elapsed:{:.2f}/{:.2f} secs".format( idx + 1, len(report_dict), report['simulation_name'], report['cum_roi'], time() - t1, time() - t0 )) except FileNotFoundError: no_report_count_params.append(name) no_report_count += 1 continue except Exception as e: print("{} Error: {}".format(name, e)) sys.exit(-1) for rp in no_report_count_params: print("no data:", rp) print("report count:{}, no report count:{}".format( report_count, no_report_count)) report_xarr.to_netcdf(out_report_path)
def get_nr_spsp_cvar_report(report_dir=pp.NRSPSPCVaR_DIR): import csv import pandas as pd import arch.bootstrap.multiple_comparison as arch_comp group_params = { 'TWG1': 'h140-200-10_a85-95-5', 'TWG2': 'h190-240-10_a55-75-5', 'TWG3': 'h60-100-10_a75-90-5', 'TWG4': 'h100-140-10_a55-75-5', 'TWG5': 'h60-90-10_a50-75-5', 'TWG6': 'h200-240-10_a50-70-5', 'USG1': 'h200-240-10_a50-65-5', 'USG2': 'h170-240-10_a50-70-5', 'USG3': 'h170-220-10_a80-95-5', 'USG4': 'h60-90-10_a75-90-5', 'USG5': 'h80-130-10_a75-90-5', 'USG6': 'h180-240-10_a50-70-5' } set_indices = (1, ) n_scenarios = (1000, ) years = [(dt.date(2005, 1, 3), dt.date(2018, 12, 28))] stat_file = os.path.join(pp.TMP_DIR, "nr_spsp_cvar_stat.csv") for regret_type in ('external', 'internal'): if regret_type == 'external': REPORT_FORMAT = "report_NR_SPSP_CVaR_{nr_strategy}_{nr_strategy_param:.2f}_{group_name}_{expert_group_name}_s{n_scenario}_sdx{scenario_set_idx}_{exp_start_date}_{exp_end_date}.pkl" # strategy_params = [["{}{:.2f}-SPSP".format(s, p), s, p] # for s in ('EG', 'EXP') # for p in (0.01, 0.1, 1)] strategy_params = [["{}{:.2f}-SPSP".format(s, p), s, p] for s in ('EXP', ) for p in (0.01, )] strategy_params.extend([["POL{:.1f}-SPSP".format(p), s, p] for s in ('POLY', ) for p in (2, )]) elif regret_type == 'internal': REPORT_FORMAT = "report_NIR_SPSP_CVaR_{nr_strategy}_{nr_strategy_param:.2f}_{group_name}_{expert_group_name}_s{n_scenario}_sdx{scenario_set_idx}_{exp_start_date}_{exp_end_date}.pkl" strategy_params = [["B1EXP{:.2f}-SPSP".format(p), s, p] for s in ('EXP', ) for p in (0.01, )] strategy_params.extend([["B1POL{:.1f}-SPSP".format(p), s, p] for s in ('POLY', ) for p in (2, )]) else: raise ValueError('unknown regret type:', regret_type) report_files = [[ s_name, REPORT_FORMAT.format(nr_strategy=s, nr_strategy_param=p, group_name=group_name, expert_group_name=exp_group_name, n_scenario=n_scenario, scenario_set_idx=sdx, exp_start_date=s_date.strftime("%Y%m%d"), exp_end_date=e_date.strftime("%Y%m%d")) ] for s_name, s, p in strategy_params for group_name, exp_group_name in group_params.items() for n_scenario in n_scenarios for sdx in set_indices for s_date, e_date in years] with open(stat_file, "a", newline='') as csv_file: fields = [ "simulation_name", "s_name", "group_name", 'expert_group', "n_expert", 'n_scenario', 'sdx', "start_date", "end_date", "n_data", "cum_roi", "annual_roi", "roi_mu", "std", "skew", "ex_kurt", "Sharpe", "Sortino_full", "Sortino_partial", "SPA_c", ] writer = csv.DictWriter(csv_file, fieldnames=fields) writer.writeheader() not_exist_reports = [] for gdx, (s_name, report_file) in enumerate(report_files): try: rp = pd.read_pickle(os.path.join(report_dir, report_file)) except FileNotFoundError as _: not_exist_reports.append(report_file) continue params = rp["simulation_name"].split('_') rois = rp['portfolio_xarr'].loc[:, 'main', 'wealth'].to_series( ).pct_change() rois[0] = 0 spa_value = 0 for _ in range(3): spa = arch_comp.SPA(rois.values, np.zeros(rois.size), reps=1000) spa.seed(np.random.randint(0, 2**31 - 1)) spa.compute() # preserve the worse p_value if spa.pvalues[1] > spa_value: spa_value = spa.pvalues[1] writer.writerow({ "simulation_name": rp["simulation_name"], "s_name": s_name, 'expert_group': rp['expert_group_name'], "n_expert": len(rp['experts']), "group_name": rp['group_name'], 'n_scenario': params[8][1:], 'sdx': params[9][-1], "start_date": rp['exp_start_date'].strftime("%Y-%m-%d"), "end_date": rp['exp_end_date'].strftime("%Y-%m-%d"), "n_data": rp['n_exp_period'], "cum_roi": rp['cum_roi'], "annual_roi": np.power(rp['cum_roi'] + 1, 1 / 14) - 1, "roi_mu": rp['daily_mean_roi'], "std": rp['daily_std_roi'], "skew": rp['daily_skew_roi'], "ex_kurt": rp['daily_ex-kurt_roi'], "Sharpe": rp['Sharpe'], "Sortino_full": rp['Sortino_full'], "Sortino_partial": rp['Sortino_partial'], "SPA_c": spa_value }) print("[{}/{}] {}, cum_roi:{:.2%}".format( gdx + 1, len(report_files), rp["simulation_name"], rp['cum_roi'])) print(report_dir) print(not_exist_reports)
def market_index_statistics(): import csv import json import statsmodels.tsa.stattools as tsa_tools import scipy.stats as spstats import portfolio_programming.statistics.risk_adjusted as risk_adj import arch.bootstrap.multiple_comparison as arch_comp start_date = dt.date(2005, 1, 1) end_date = dt.date(2018, 12, 31) with open(pp.TAIEX_2005_MKT_CAP_50_SYMBOL_JSON) as tw_fin: tw_symbols = json.load(tw_fin) tw_xarr = xr.open_dataarray(pp.TAIEX_2005_MKT_CAP_NC) taiex_mkt_idx = tw_symbols[-1] with open(pp.DJIA_2005_SYMBOL_JSON) as us_fin: djia_symbols = json.load(us_fin) djia_xarr = xr.open_dataarray(pp.DJIA_2005_NC) djia_mkt_idx = djia_symbols[-1] mkt_stats_file = os.path.join(pp.TMP_DIR, "market_index_stat.csv") with open(mkt_stats_file, "w", ) as csv_file: fields = [ "symbol", "start_date", "end_date", "n_data", "cum_roi", "annual_roi", "roi_mu", "std", "skew", "ex_kurt", "Sharpe", "Sortino", "SPA_c", "JB", "worst_ADF" ] writer = csv.DictWriter(csv_file, fieldnames=fields) writer.writeheader() for mkt_symbol, data_xarr in zip([taiex_mkt_idx, djia_mkt_idx], [tw_xarr, djia_xarr]): t0 = time() print(mkt_symbol) rois = data_xarr.loc[start_date:end_date, mkt_symbol, "simple_roi"] trans_dates = rois.get_index("trans_date") n_roi = int(rois.count()) rois[0] = 0 cumulative_roi = float((1 + rois).prod() - 1) annual_roi = float(np.power(cumulative_roi + 1, 1.0 / ( end_date.year - start_date.year + 1)) - 1) sharpe = risk_adj.Sharpe(rois) sortino = risk_adj.Sortino_full(rois)[0] jb = spstats.jarque_bera(rois)[1] # worse case of adf adf_c = tsa_tools.adfuller(rois, regression="c")[1] adf_ct = tsa_tools.adfuller(rois, regression="ct")[1] adf_ctt = tsa_tools.adfuller(rois, regression="ctt")[1] adf_nc = tsa_tools.adfuller(rois, regression="nc")[1] adf = max(adf_c, adf_ct, adf_ctt, adf_nc) # worse case of SPA spa_value = 0 for _ in range(10): spa = arch_comp.SPA(rois.data, np.zeros(rois.size), reps=1000) spa.seed(np.random.randint(0, 2 ** 31 - 1)) spa.compute() # preserve the worse p_value if spa.pvalues[1] > spa_value: spa_value = spa.pvalues[1] writer.writerow({ "symbol": mkt_symbol, "start_date": trans_dates[0].strftime("%Y-%m-%d"), "end_date": trans_dates[-1].strftime("%Y-%m-%d"), "n_data": n_roi, "cum_roi": cumulative_roi, "annual_roi": annual_roi, "roi_mu": float(rois.mean()), "std": float(rois.std(ddof=1)), "skew": spstats.skew(rois, bias=False), "ex_kurt": spstats.kurtosis(rois, bias=False), "Sharpe": sharpe, "Sortino": sortino, "SPA_c": spa_value, "JB": jb, "worst_ADF": adf, } ) print( "{} cum_roi:{:.2%} {:.4f} secs".format( mkt_symbol, cumulative_roi, time() - t0 ) )
def symbol_statistics(exp_name): """ the statistics of the return of the specified stocks """ import csv import json import statsmodels.tsa.stattools as tsa_tools import scipy.stats as spstats import portfolio_programming.statistics.risk_adjusted as risk_adj import arch.bootstrap.multiple_comparison as arch_comp if exp_name == 'stocksp_cor15': start_date = dt.date(2005, 1, 1) end_date = dt.date(2014, 12, 31) with open(pp.TAIEX_2005_MKT_CAP_50_SYMBOL_JSON) as fin: symbols = json.load(fin) data_xarr = xr.open_dataarray(pp.TAIEX_2005_MKT_CAP_NC) with open( os.path.join(pp.TMP_DIR, "TAIEX_20050103_50largest_listed_market_cap_stat.csv"), "w", ) as csv_file: fields = [ "rank", "symbol", "start_date", "end_date", "n_data", "cum_roi", "annual_roi", "roi_mu", "std", "skew", "ex_kurt", "Sharpe", "Sortino", "JB", "worst_ADF", "SPA_c", ] writer = csv.DictWriter(csv_file, fieldnames=fields) writer.writeheader() for sdx, symbol in enumerate(symbols): rois = data_xarr.loc[start_date:end_date, symbol, "simple_roi"] trans_dates = rois.get_index("trans_date") n_roi = int(rois.count()) rois[0] = 0 cumulative_roi = float((1 + rois).prod() - 1) annual_roi = float(np.power(cumulative_roi + 1, 1.0 / 10) - 1) sharpe = risk_adj.Sharpe(rois) sortino = risk_adj.Sortino_full(rois)[0] jb = spstats.jarque_bera(rois)[1] # worse case of adf adf_c = tsa_tools.adfuller(rois, regression="c")[1] adf_ct = tsa_tools.adfuller(rois, regression="ct")[1] adf_ctt = tsa_tools.adfuller(rois, regression="ctt")[1] adf_nc = tsa_tools.adfuller(rois, regression="nc")[1] adf = max(adf_c, adf_ct, adf_ctt, adf_nc) spa_value = 0 for _ in range(10): spa = arch_comp.SPA(rois.data, np.zeros(rois.size), reps=1000) spa.seed(np.random.randint(0, 2 ** 31 - 1)) spa.compute() # preserve the worse p_value if spa.pvalues[1] > spa_value: spa_value = spa.pvalues[1] writer.writerow( { "rank": sdx + 1, "symbol": symbol, "start_date": trans_dates[0].strftime("%Y-%m-%d"), "end_date": trans_dates[-1].strftime("%Y-%m-%d"), "n_data": n_roi, "cum_roi": cumulative_roi, "annual_roi": annual_roi, "roi_mu": float(rois.mean()), "std": float(rois.std(ddof=1)), "skew": spstats.skew(rois, bias=False), "ex_kurt": spstats.kurtosis(rois, bias=False), "Sharpe": sharpe, "Sortino": sortino, "JB": jb, "worst_ADF": adf, "SPA_c": spa_value, } ) print( "[{}/{}] {}, cum_roi:{:.2%}".format( sdx + 1, len(symbols), symbol, cumulative_roi ) ) elif exp_name == 'dissertation': start_date = dt.date(2005, 1, 1) end_date = dt.date(2018, 12, 31) with open(pp.TAIEX_2005_MKT_CAP_50_SYMBOL_JSON) as tw_fin: tw_symbols = json.load(tw_fin) tw_xarr = xr.open_dataarray(pp.TAIEX_2005_MKT_CAP_NC) tw_stats_file = os.path.join(pp.TMP_DIR, "TAIEX_2005_market_cap_stat.csv") tw_group_symbols = zip( ['TWG{}'.format(idx // 5 + 1) for idx in range(30)], tw_symbols ) with open(pp.DJIA_2005_SYMBOL_JSON) as us_fin: djia_symbols = json.load(us_fin) djia_xarr = xr.open_dataarray(pp.DJIA_2005_NC) djia_stats_file = os.path.join(pp.TMP_DIR, "DJIA_2005_symbols_stat.csv") djia_group_symbols = zip( ['USG{}'.format(idx // 5 + 1) for idx in range(30)], djia_symbols ) for mkt, group_symbols, data_xarr, stat_file in zip(['djia', 'tw'], [djia_group_symbols, tw_group_symbols], [djia_xarr, tw_xarr], [djia_stats_file, tw_stats_file]): with open(stat_file, "w",) as csv_file: fields = [ "rank", 'group', "symbol", "start_date", "end_date", "n_data", "cum_roi", "annual_roi", "roi_mu", "std", "skew", "ex_kurt", "Sharpe", "Sortino", "SPA_c", "JB", "worst_ADF" ] writer = csv.DictWriter(csv_file, fieldnames=fields) writer.writeheader() for sdx, (group, symbol) in enumerate(group_symbols): t0 = time() print(group, symbol) rois = data_xarr.loc[start_date:end_date, symbol, "simple_roi"] trans_dates = rois.get_index("trans_date") n_roi = int(rois.count()) rois[0] = 0 cumulative_roi = float((1 + rois).prod() - 1) annual_roi = float(np.power(cumulative_roi + 1, 1.0 / ( end_date.year - start_date.year + 1)) - 1) sharpe = risk_adj.Sharpe(rois) sortino = risk_adj.Sortino_full(rois)[0] jb = spstats.jarque_bera(rois)[1] # worse case of adf adf_c = tsa_tools.adfuller(rois, regression="c")[1] adf_ct = tsa_tools.adfuller(rois, regression="ct")[1] adf_ctt = tsa_tools.adfuller(rois, regression="ctt")[1] adf_nc = tsa_tools.adfuller(rois, regression="nc")[1] adf = max(adf_c, adf_ct, adf_ctt, adf_nc) # worse case of SPA spa_value = 0 for _ in range(10): spa = arch_comp.SPA(rois.data, np.zeros(rois.size), reps=1000) spa.seed(np.random.randint(0, 2 ** 31 - 1)) spa.compute() # preserve the worse p_value if spa.pvalues[1] > spa_value: spa_value = spa.pvalues[1] writer.writerow( { "rank": sdx + 1, "group": group, "symbol": symbol, "start_date": trans_dates[0].strftime("%Y-%m-%d"), "end_date": trans_dates[-1].strftime("%Y-%m-%d"), "n_data": n_roi, "cum_roi": cumulative_roi, "annual_roi": annual_roi, "roi_mu": float(rois.mean()), "std": float(rois.std(ddof=1)), "skew": spstats.skew(rois, bias=False), "ex_kurt": spstats.kurtosis(rois, bias=False), "Sharpe": sharpe, "Sortino": sortino, "SPA_c": spa_value, "JB": jb, "worst_ADF": adf, } ) print( "[{}] {} {}, cum_roi:{:.2%} {:.4f}".format( sdx + 1, group, symbol, cumulative_roi, time()-t0 ) ) else: raise ValueError("unknown exp_name:{}".format(exp_name))