def main_global_setup(config, filter_pairs=None): global pi global state_encoding_model global env global optimizer global root global plot_folder_path global checkpoint_dir global _logger plot_folder_path = './model/logging/{}/plots/'.format(job_name) checkpoint_dir = './model/logging/{}/saved_models/'.format(job_name) log_folder_path = './model/logging/{}/'.format(job_name) os.makedirs(plot_folder_path, exist_ok=True) os.makedirs(checkpoint_dir, exist_ok=True) LogHelper.setup(log_path=log_folder_path + 'log.txt', log_level=logging.INFO) _logger = logging.getLogger(__name__) _logger.info("Hello World!") _logger.info("{}".format(config)) _logger.info("config.train_indices = {}".format(config.train_indices)) _logger.info("config.test_indices = {}".format(config.test_indices)) _logger.info("num_of_batch = {}".format(num_of_batch)) # load data if config.load_which_data == "tech": all_pairs_slices, all_pairs_df, trading_period = rl_load_data.load_data( filter_pairs=filter_pairs) elif config.load_which_data == "energy": all_pairs_slices, all_pairs_df, trading_period = rl_load_data.load_data( dataset_folder_path='./model/dataset/nyse-daily-energy-transformed', raw_files_path_pattern= "./model/dataset/nyse-daily-energy-trimmed-same-length/*.csv", filter_pairs=filter_pairs) elif config.load_which_data == "other": all_pairs_slices, all_pairs_df, trading_period = rl_load_data.load_data( dataset_folder_path='./model/dataset/other-assets-transformed', raw_files_path_pattern= "./model/dataset/other-assets-trimmed-same-length/*.csv", filter_pairs=filter_pairs) # create objects pi = TradingPolicyModel() state_encoding_model = StateEncodingModel(batch_size, num_rnn_layers) env = trading_env.TradingEnvironment(state_encoding_model, all_pairs_slices, all_pairs_df, trading_period, batch_size, rl_load_data.col_name_to_ind) optimizer = tf.train.AdamOptimizer(learning_rate=lr) # create checkpoint object root = tf.train.Checkpoint(pi=pi, state_encoding_model=state_encoding_model, optimizer=optimizer)
def main(): ################################################################################################## # Setup logger and output dir # ################################################################################################## output_dir = 'output/grid-search-{}'.format( datetime.now( timezone('Asia/Hong_Kong')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3]) if not os.path.exists(output_dir): pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) # Setup logger LogHelper.setup(log_path='{}/backtesting.log'.format(output_dir), log_level=logging.INFO) _logger = logging.getLogger(__name__) # Log all paremeters _logger.info("Grid search parameters: {}".format(vars(config))) # get relevant stock data start_date_dt = datetime.strptime(config.start_date, "%Y-%m-%d").date() end_date_dt = datetime.strptime(config.end_date, "%Y-%m-%d").date() data = trim_raw_data_files(start_date=start_date_dt, end_date=end_date_dt, raw_folder="../data/nyse-daily-tech/", result_folder="../tmp-data/") for stk in data: data[stk] = data[stk].reset_index() # get aggregated open and close prices close_df = GSTools.get_aggregated_with_dates(data, col='close').set_index("date") open_df = GSTools.get_aggregated_with_dates(data, col='open').set_index("date") close_df_no_nan = close_df.dropna(axis='columns') _logger.info("Length of close_df before dropping NaN columns: {}".format( len(close_df))) _logger.info("Length of close_df after dropping NaN columns: {}".format( len(close_df_no_nan))) close_df = close_df_no_nan ################################################################################################## # perform pair selection # ################################################################################################## ps_start_dt = config.pair_selection_start_date ps_end_dt = config.pair_selection_end_date ps_df = close_df.loc[ps_start_dt:ps_end_dt].copy() good_pairs = None param_combinations = None # total number of stocks remaining N = len(data.keys()) # number of pairs of interest K = int(config.pct * N * (N - 1) / 2) if config.strategy_type == "distance": _logger.info("Choosing the best {} pairs out of {}.".format(K, N)) good_pairs = select_pairs_for_all_combin(train_df=ps_df, test_df=None, config={ 'n': K, 'score_function': distance_score, 'series_transform': distance_transform }, plot=False) elif config.strategy_type == "cointegration" or config.strategy_type == "kalman": tmp_df = ps_df.copy() tmp_df = tmp_df.reset_index(drop=True) good_pairs = coint(df=tmp_df, intercept=True, sig_level=0.005) good_pairs.sort(key=lambda x: x[2]) K = int(config.pct * len(good_pairs)) _logger.info("Choosing the best {} pairs out of {}.".format( K, len(good_pairs))) good_pairs = good_pairs[0:K] # log all selected pairs _logger.info("The selected pairs are: {}".format(good_pairs)) ################################################################################################## # generate parameter space # ################################################################################################## if config.strategy_type == "distance" or config.strategy_type == "cointegration": param_combinations = list( itertools.product(config.lookback_values, config.enter_thresholds, config.exit_thresholds, config.loss_limits)) param_combinations = [ dict( zip([ "lookback", "enter_threshold", "exit_threshold", "loss_limit" ], values)) for values in param_combinations ] elif config.strategy_type == "kalman": param_combinations = list( itertools.product(config.enter_thresholds, config.exit_thresholds, config.loss_limits)) param_combinations = [ dict( zip(["enter_threshold", "exit_threshold", "loss_limit"], values)) for values in param_combinations ] ################################################################################################## # calculate max_lookback # ################################################################################################## MAX_LOOKBACK = 0 if config.strategy_type == "distance" or config.strategy_type == "cointegration": MAX_LOOKBACK = max(config.lookback_values) elif config.strategy_type == "kalman": MAX_LOOKBACK = config.kalman_estimation_length ################################################################################################## # perform grid search # ################################################################################################## # list to store MACRO results macro_results = [] for i, params in enumerate(param_combinations, 1): _logger.info("Running parameter combination " + str(i) + "/" + str(len(param_combinations))) _logger.info( "Backtesting all pairs using parameters: {}".format(params)) # list to store MICRO results results = [] stock_data_close = close_df.loc[config.start_date:config. backtest_start].tail(MAX_LOOKBACK) stock_data_close = stock_data_close.append( close_df.loc[config.backtest_start:config.backtest_end]) stock_data_open = open_df.loc[config.start_date:config. backtest_start].tail(MAX_LOOKBACK) stock_data_open = stock_data_open.append( close_df.loc[config.backtest_start:config.backtest_end]) for j, pair in enumerate(good_pairs, 1): # get names of both stock _logger.info("Running pair " + str(j) + "/" + str(len(good_pairs))) stk0, stk1 = None, None if config.strategy_type == "kalman" or config.strategy_type == "cointegration": stk0, stk1, _ = pair else: stk0, stk1 = pair # get data of both stock stk0_df_test = pd.DataFrame({ 'datetime': stock_data_close[stk0].index, 'close': stock_data_close[stk0].values.astype(float), 'open': stock_data_open[stk0].values.astype(float) }) stk1_df_test = pd.DataFrame({ 'datetime': stock_data_close[stk1].index, 'close': stock_data_close[stk1].values.astype(float), 'open': stock_data_open[stk1].values.astype(float) }) stk0_df_test = stk0_df_test[['datetime', 'close', 'open']] stk1_df_test = stk1_df_test[['datetime', 'close', 'open']] # Create a cerebro cerebro = bt.Cerebro() # Create data feeds data0 = bt.feeds.PandasData(dataname=stk0_df_test, timeframe=(bt.TimeFrame.Days), datetime=0, close=1, open=2) data1 = bt.feeds.PandasData(dataname=stk1_df_test, timeframe=(bt.TimeFrame.Days), datetime=0, close=1, open=2) # add data feeds to cerebro cerebro.adddata(data0) cerebro.adddata(data1) # Add the strategy if config.strategy_type == "distance": cerebro.addstrategy( DistStrategy, lookback=params["lookback"], max_lookback=MAX_LOOKBACK, enter_threshold_size=params["enter_threshold"], exit_threshold_size=params["exit_threshold"], loss_limit=params["loss_limit"], consider_borrow_cost=True, consider_commission=False, print_msg=False) elif config.strategy_type == "cointegration": cerebro.addstrategy( CointStrategy, lookback=params["lookback"], max_lookback=MAX_LOOKBACK, enter_threshold_size=params["enter_threshold"], exit_threshold_size=params["exit_threshold"], loss_limit=params["loss_limit"], consider_borrow_cost=True, consider_commission=False) elif config.strategy_type == "kalman": cerebro.addstrategy( CointKalmanStrategy, max_lookback=MAX_LOOKBACK, enter_threshold_size=params["enter_threshold"], exit_threshold_size=params["exit_threshold"], loss_limit=params["loss_limit"], consider_borrow_cost=True, consider_commission=False) # Add analyzers cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='mysharpe') cerebro.addanalyzer(Metrics, lookback=MAX_LOOKBACK, _name='metrics') # Add the commission - only stocks like a for each operation cerebro.broker.setcash(1000000) # And run it strat = cerebro.run() # get MICRO metrics results_dict = {} results_dict["pair"] = stk0 + "-" + stk1 results_dict["sharperatio"] = strat[ 0].analyzers.mysharpe.get_analysis()['sharperatio'] results_dict["returnstd"] = strat[0].analyzers.metrics.returns_std( ) results_dict["startcash"] = cerebro.getbroker().startingcash results_dict["endcash"] = cerebro.getbroker().getvalue() results_dict["profit"] = ( results_dict["endcash"] - results_dict["startcash"]) / results_dict["startcash"] results.append(results_dict) _logger.info("Performance of this pair: {}".format(results_dict)) # convert to dataframe results_df = pd.DataFrame(results) # save as csv uuid_str = str(uuid.uuid4()) path = output_dir + "/" + str(uuid_str) + ".csv" results_df.to_csv(path_or_buf=path, index=False) # calculate MACRO attributes avg_sharpe_ratio = results_df['sharperatio'].mean() median_sharpe_ratio = results_df['sharperatio'].median() avg_overall_return = results_df['profit'].mean() median_overall_return = results_df['profit'].median() overall_return_std = results_df['profit'].std() if config.strategy_type == "distance" or config.strategy_type == "cointegration": tup = (params["lookback"], params["enter_threshold"], params["exit_threshold"], params["loss_limit"], avg_sharpe_ratio, median_sharpe_ratio, avg_overall_return, median_overall_return, overall_return_std, uuid_str) elif config.strategy_type == "kalman": tup = (params["enter_threshold"], params["exit_threshold"], params["loss_limit"], avg_sharpe_ratio, median_sharpe_ratio, avg_overall_return, median_overall_return, overall_return_std, uuid_str) macro_results.append(tup) _logger.info("Performance of this set of parameters: {}".format(tup)) macro_results_df = pd.DataFrame(macro_results) if config.strategy_type == "distance" or config.strategy_type == "cointegration": macro_results_df.columns = [ 'lookback', 'enter_threshold_size', 'exit_threshold_size', 'loss_limit', 'avg_sharpe_ratio', 'median_sharpe_ratio', 'avg_overall_return', 'median_overall_return', 'overall_return_std', 'uuid' ] elif config.strategy_type == "kalman": macro_results_df.columns = [ 'enter_threshold_size', 'exit_threshold_size', 'loss_limit', 'avg_sharpe_ratio', 'median_sharpe_ratio', 'avg_overall_return', 'median_overall_return', 'overall_return_std', 'uuid' ] macro_results_df.to_csv(output_dir + '/' + 'summary.csv', index=False)
def main(): # Get arguments parsed args = get_args() # Setup for logging output_dir = 'output/{}'.format( datetime.now( timezone('Asia/Hong_Kong')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3]) create_dir(output_dir) LogHelper.setup(log_path='{}/training.log'.format(output_dir), level_str='INFO') _logger = logging.getLogger(__name__) # Save the configuration for logging purpose save_yaml_config(args, path='{}/config.yaml'.format(output_dir)) # Reproducibility set_seed(args.seed) # Get dataset dataset = SyntheticDataset(args.n, args.d, args.graph_type, args.degree, args.sem_type, args.noise_scale, args.dataset_type, args.x_dim) _logger.info('Finished generating dataset') model = GAE(args.n, args.d, args.x_dim, args.seed, args.num_encoder_layers, args.num_decoder_layers, args.hidden_size, args.latent_dim, args.l1_graph_penalty, args.use_float64) model.print_summary(print_func=model.logger.info) trainer = ALTrainer(args.init_rho, args.rho_thres, args.h_thres, args.rho_multiply, args.init_iter, args.learning_rate, args.h_tol, args.early_stopping, args.early_stopping_thres) W_est = trainer.train(model, dataset.X, dataset.W, args.graph_thres, args.max_iter, args.iter_step, output_dir) _logger.info('Finished training model') # Save raw recovered graph, ground truth and observational data after training np.save('{}/true_graph.npy'.format(output_dir), dataset.W) np.save('{}/observational_data.npy'.format(output_dir), dataset.X) np.save('{}/final_raw_recovered_graph.npy'.format(output_dir), W_est) # Plot raw recovered graph plot_recovered_graph( W_est, dataset.W, save_name='{}/raw_recovered_graph.png'.format(output_dir)) _logger.info('Filter by constant threshold') W_est = W_est / np.max(np.abs(W_est)) # Normalize # Plot thresholded recovered graph W_est[np.abs(W_est) < args.graph_thres] = 0 # Thresholding plot_recovered_graph( W_est, dataset.W, save_name='{}/thresholded_recovered_graph.png'.format(output_dir)) results_thresholded = count_accuracy(dataset.W, W_est) _logger.info('Results after thresholding by {}: {}'.format( args.graph_thres, results_thresholded))
def main(): ################################################################################################## # Setup logger and output dir # ################################################################################################## output_dir = config.output_dir if output_dir is None: output_dir = './jupyter_py/output/backtest-{}'.format( get_current_time()) if not os.path.exists(output_dir): pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) # Setup logger LogHelper.setup(log_path='{}/backtesting.log'.format(output_dir), log_level=logging.INFO) _logger = logging.getLogger(__name__) # Log all paremeters _logger.info("Backtest parameters: {}".format(vars(config))) # load data data = GSTools.load_csv_files(config.data_path) stk0, stk1 = config.stk0, config.stk1 # check existence of stocks if (stk0 not in data): _logger.error("Stock symbol {} does not exist!".format(stk0)) return if (stk1 not in data): _logger.error("Stock symbol {} does not exist!".format(stk1)) return # size requirements pre_backtest_size = None if config.strategy_type == "cointegration" or config.strategy_type == "distance": pre_backtest_size = config.lookback elif config.strategy_type == "kalman": pre_backtest_size = config.kalman_estimation_length # select segment of data that we want data0, data1 = data[stk0].set_index("date"), data[stk1].set_index("date") start_date_dt = datetime.strptime(config.backtest_start, "%Y-%m-%d").date() end_date_dt = datetime.strptime(config.backtest_end, "%Y-%m-%d").date() data0 = data0[:start_date_dt].tail(pre_backtest_size).append( data0[start_date_dt:end_date_dt]) data1 = data1[:start_date_dt].tail(pre_backtest_size).append( data1[start_date_dt:end_date_dt]) data0 = data0.reset_index() data1 = data1.reset_index() # initialize cerebro cerebro = bt.Cerebro() # Create data feeds data0 = bt.feeds.PandasData(dataname=data0, timeframe=(bt.TimeFrame.Days), datetime=0, open=1, close=4) data1 = bt.feeds.PandasData(dataname=data1, timeframe=(bt.TimeFrame.Days), datetime=0, open=1, close=4) # add data feeds to cerebro cerebro.adddata(data0) cerebro.adddata(data1) # Add the strategy if config.strategy_type == "distance": cerebro.addstrategy(DistStrategy, stk0_symbol=stk0, stk1_symbol=stk1, lookback=config.lookback, max_lookback=pre_backtest_size, enter_threshold_size=config.enter_threshold, exit_threshold_size=config.exit_threshold, loss_limit=config.loss_limit, consider_borrow_cost=True, consider_commission=False, print_msg=True, print_transaction=True) elif config.strategy_type == "cointegration": cerebro.addstrategy(CointStrategy, stk0_symbol=stk0, stk1_symbol=stk1, lookback=config.lookback, max_lookback=pre_backtest_size, enter_threshold_size=config.enter_threshold, exit_threshold_size=config.exit_threshold, loss_limit=config.loss_limit, consider_borrow_cost=True, consider_commission=True, print_msg=True, print_transaction=True) elif config.strategy_type == "kalman": cerebro.addstrategy(CointKalmanStrategy, stk0_symbol=stk0, stk1_symbol=stk1, max_lookback=pre_backtest_size, enter_threshold_size=config.enter_threshold, exit_threshold_size=config.exit_threshold, loss_limit=config.loss_limit, consider_borrow_cost=True, consider_commission=True, print_msg=True, print_transaction=True) # Add analyzers cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='mysharpe') cerebro.addanalyzer(Metrics, lookback=pre_backtest_size, _name='metrics') # Add the commission - only stocks like a for each operation cerebro.broker.setcash(1000000) # And run it strat = cerebro.run() # get MICRO metrics results_dict = {} results_dict["pair"] = stk0 + "-" + stk1 results_dict["sharperatio"] = strat[0].analyzers.mysharpe.get_analysis( )['sharperatio'] results_dict["returnstd"] = strat[0].analyzers.metrics.returns_std() results_dict["avg_holding_period"] = strat[ 0].analyzers.metrics.avg_holding_period results_dict["n_trades"] = strat[0].analyzers.metrics.n_trades results_dict["startcash"] = cerebro.getbroker().startingcash results_dict["endcash"] = cerebro.getbroker().getvalue() results_dict["profit"] = ( results_dict["endcash"] - results_dict["startcash"]) / results_dict["startcash"] _logger.info("[pair-performance]: {}".format(results_dict))