def main_global_setup(config, filter_pairs=None):
    global pi
    global state_encoding_model
    global env
    global optimizer
    global root
    global plot_folder_path
    global checkpoint_dir
    global _logger

    plot_folder_path = './model/logging/{}/plots/'.format(job_name)
    checkpoint_dir = './model/logging/{}/saved_models/'.format(job_name)
    log_folder_path = './model/logging/{}/'.format(job_name)

    os.makedirs(plot_folder_path, exist_ok=True)
    os.makedirs(checkpoint_dir, exist_ok=True)

    LogHelper.setup(log_path=log_folder_path + 'log.txt',
                    log_level=logging.INFO)
    _logger = logging.getLogger(__name__)

    _logger.info("Hello World!")
    _logger.info("{}".format(config))

    _logger.info("config.train_indices = {}".format(config.train_indices))
    _logger.info("config.test_indices = {}".format(config.test_indices))
    _logger.info("num_of_batch = {}".format(num_of_batch))

    # load data
    if config.load_which_data == "tech":
        all_pairs_slices, all_pairs_df, trading_period = rl_load_data.load_data(
            filter_pairs=filter_pairs)
    elif config.load_which_data == "energy":
        all_pairs_slices, all_pairs_df, trading_period = rl_load_data.load_data(
            dataset_folder_path='./model/dataset/nyse-daily-energy-transformed',
            raw_files_path_pattern=
            "./model/dataset/nyse-daily-energy-trimmed-same-length/*.csv",
            filter_pairs=filter_pairs)
    elif config.load_which_data == "other":
        all_pairs_slices, all_pairs_df, trading_period = rl_load_data.load_data(
            dataset_folder_path='./model/dataset/other-assets-transformed',
            raw_files_path_pattern=
            "./model/dataset/other-assets-trimmed-same-length/*.csv",
            filter_pairs=filter_pairs)

    # create objects
    pi = TradingPolicyModel()
    state_encoding_model = StateEncodingModel(batch_size, num_rnn_layers)
    env = trading_env.TradingEnvironment(state_encoding_model,
                                         all_pairs_slices, all_pairs_df,
                                         trading_period, batch_size,
                                         rl_load_data.col_name_to_ind)
    optimizer = tf.train.AdamOptimizer(learning_rate=lr)

    # create checkpoint object
    root = tf.train.Checkpoint(pi=pi,
                               state_encoding_model=state_encoding_model,
                               optimizer=optimizer)
예제 #2
0
def main():
    ##################################################################################################
    # Setup logger and output dir                                                                    #
    ##################################################################################################
    output_dir = 'output/grid-search-{}'.format(
        datetime.now(
            timezone('Asia/Hong_Kong')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3])
    if not os.path.exists(output_dir):
        pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)

    # Setup logger
    LogHelper.setup(log_path='{}/backtesting.log'.format(output_dir),
                    log_level=logging.INFO)
    _logger = logging.getLogger(__name__)

    # Log all paremeters
    _logger.info("Grid search parameters: {}".format(vars(config)))

    # get relevant stock data
    start_date_dt = datetime.strptime(config.start_date, "%Y-%m-%d").date()
    end_date_dt = datetime.strptime(config.end_date, "%Y-%m-%d").date()
    data = trim_raw_data_files(start_date=start_date_dt,
                               end_date=end_date_dt,
                               raw_folder="../data/nyse-daily-tech/",
                               result_folder="../tmp-data/")

    for stk in data:
        data[stk] = data[stk].reset_index()

    # get aggregated open and close prices
    close_df = GSTools.get_aggregated_with_dates(data,
                                                 col='close').set_index("date")
    open_df = GSTools.get_aggregated_with_dates(data,
                                                col='open').set_index("date")
    close_df_no_nan = close_df.dropna(axis='columns')

    _logger.info("Length of close_df before dropping NaN columns: {}".format(
        len(close_df)))
    _logger.info("Length of close_df after dropping NaN columns: {}".format(
        len(close_df_no_nan)))

    close_df = close_df_no_nan

    ##################################################################################################
    # perform pair selection                                                                         #
    ##################################################################################################
    ps_start_dt = config.pair_selection_start_date
    ps_end_dt = config.pair_selection_end_date
    ps_df = close_df.loc[ps_start_dt:ps_end_dt].copy()
    good_pairs = None
    param_combinations = None

    # total number of stocks remaining
    N = len(data.keys())

    # number of pairs of interest
    K = int(config.pct * N * (N - 1) / 2)

    if config.strategy_type == "distance":
        _logger.info("Choosing the best {} pairs out of {}.".format(K, N))
        good_pairs = select_pairs_for_all_combin(train_df=ps_df,
                                                 test_df=None,
                                                 config={
                                                     'n':
                                                     K,
                                                     'score_function':
                                                     distance_score,
                                                     'series_transform':
                                                     distance_transform
                                                 },
                                                 plot=False)

    elif config.strategy_type == "cointegration" or config.strategy_type == "kalman":
        tmp_df = ps_df.copy()
        tmp_df = tmp_df.reset_index(drop=True)
        good_pairs = coint(df=tmp_df, intercept=True, sig_level=0.005)
        good_pairs.sort(key=lambda x: x[2])
        K = int(config.pct * len(good_pairs))
        _logger.info("Choosing the best {} pairs out of {}.".format(
            K, len(good_pairs)))
        good_pairs = good_pairs[0:K]

    # log all selected pairs
    _logger.info("The selected pairs are: {}".format(good_pairs))

    ##################################################################################################
    # generate parameter space                                                                       #
    ##################################################################################################
    if config.strategy_type == "distance" or config.strategy_type == "cointegration":
        param_combinations = list(
            itertools.product(config.lookback_values, config.enter_thresholds,
                              config.exit_thresholds, config.loss_limits))
        param_combinations = [
            dict(
                zip([
                    "lookback", "enter_threshold", "exit_threshold",
                    "loss_limit"
                ], values)) for values in param_combinations
        ]
    elif config.strategy_type == "kalman":
        param_combinations = list(
            itertools.product(config.enter_thresholds, config.exit_thresholds,
                              config.loss_limits))
        param_combinations = [
            dict(
                zip(["enter_threshold", "exit_threshold", "loss_limit"],
                    values)) for values in param_combinations
        ]

    ##################################################################################################
    # calculate max_lookback                                                                         #
    ##################################################################################################
    MAX_LOOKBACK = 0
    if config.strategy_type == "distance" or config.strategy_type == "cointegration":
        MAX_LOOKBACK = max(config.lookback_values)
    elif config.strategy_type == "kalman":
        MAX_LOOKBACK = config.kalman_estimation_length

    ##################################################################################################
    # perform grid search                                                                            #
    ##################################################################################################
    # list to store MACRO results
    macro_results = []

    for i, params in enumerate(param_combinations, 1):
        _logger.info("Running parameter combination " + str(i) + "/" +
                     str(len(param_combinations)))
        _logger.info(
            "Backtesting all pairs using parameters: {}".format(params))

        # list to store MICRO results
        results = []

        stock_data_close = close_df.loc[config.start_date:config.
                                        backtest_start].tail(MAX_LOOKBACK)
        stock_data_close = stock_data_close.append(
            close_df.loc[config.backtest_start:config.backtest_end])

        stock_data_open = open_df.loc[config.start_date:config.
                                      backtest_start].tail(MAX_LOOKBACK)
        stock_data_open = stock_data_open.append(
            close_df.loc[config.backtest_start:config.backtest_end])

        for j, pair in enumerate(good_pairs, 1):
            # get names of both stock
            _logger.info("Running pair " + str(j) + "/" + str(len(good_pairs)))
            stk0, stk1 = None, None

            if config.strategy_type == "kalman" or config.strategy_type == "cointegration":
                stk0, stk1, _ = pair
            else:
                stk0, stk1 = pair

            # get data of both stock
            stk0_df_test = pd.DataFrame({
                'datetime':
                stock_data_close[stk0].index,
                'close':
                stock_data_close[stk0].values.astype(float),
                'open':
                stock_data_open[stk0].values.astype(float)
            })
            stk1_df_test = pd.DataFrame({
                'datetime':
                stock_data_close[stk1].index,
                'close':
                stock_data_close[stk1].values.astype(float),
                'open':
                stock_data_open[stk1].values.astype(float)
            })
            stk0_df_test = stk0_df_test[['datetime', 'close', 'open']]
            stk1_df_test = stk1_df_test[['datetime', 'close', 'open']]

            # Create a cerebro
            cerebro = bt.Cerebro()

            # Create data feeds
            data0 = bt.feeds.PandasData(dataname=stk0_df_test,
                                        timeframe=(bt.TimeFrame.Days),
                                        datetime=0,
                                        close=1,
                                        open=2)
            data1 = bt.feeds.PandasData(dataname=stk1_df_test,
                                        timeframe=(bt.TimeFrame.Days),
                                        datetime=0,
                                        close=1,
                                        open=2)

            # add data feeds to cerebro
            cerebro.adddata(data0)
            cerebro.adddata(data1)

            # Add the strategy
            if config.strategy_type == "distance":
                cerebro.addstrategy(
                    DistStrategy,
                    lookback=params["lookback"],
                    max_lookback=MAX_LOOKBACK,
                    enter_threshold_size=params["enter_threshold"],
                    exit_threshold_size=params["exit_threshold"],
                    loss_limit=params["loss_limit"],
                    consider_borrow_cost=True,
                    consider_commission=False,
                    print_msg=False)
            elif config.strategy_type == "cointegration":
                cerebro.addstrategy(
                    CointStrategy,
                    lookback=params["lookback"],
                    max_lookback=MAX_LOOKBACK,
                    enter_threshold_size=params["enter_threshold"],
                    exit_threshold_size=params["exit_threshold"],
                    loss_limit=params["loss_limit"],
                    consider_borrow_cost=True,
                    consider_commission=False)
            elif config.strategy_type == "kalman":
                cerebro.addstrategy(
                    CointKalmanStrategy,
                    max_lookback=MAX_LOOKBACK,
                    enter_threshold_size=params["enter_threshold"],
                    exit_threshold_size=params["exit_threshold"],
                    loss_limit=params["loss_limit"],
                    consider_borrow_cost=True,
                    consider_commission=False)

            # Add analyzers
            cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='mysharpe')
            cerebro.addanalyzer(Metrics,
                                lookback=MAX_LOOKBACK,
                                _name='metrics')

            # Add the commission - only stocks like a for each operation
            cerebro.broker.setcash(1000000)

            # And run it
            strat = cerebro.run()

            # get MICRO metrics
            results_dict = {}
            results_dict["pair"] = stk0 + "-" + stk1
            results_dict["sharperatio"] = strat[
                0].analyzers.mysharpe.get_analysis()['sharperatio']
            results_dict["returnstd"] = strat[0].analyzers.metrics.returns_std(
            )
            results_dict["startcash"] = cerebro.getbroker().startingcash
            results_dict["endcash"] = cerebro.getbroker().getvalue()
            results_dict["profit"] = (
                results_dict["endcash"] -
                results_dict["startcash"]) / results_dict["startcash"]

            results.append(results_dict)
            _logger.info("Performance of this pair: {}".format(results_dict))

        # convert to dataframe
        results_df = pd.DataFrame(results)

        # save as csv
        uuid_str = str(uuid.uuid4())
        path = output_dir + "/" + str(uuid_str) + ".csv"
        results_df.to_csv(path_or_buf=path, index=False)

        # calculate MACRO attributes
        avg_sharpe_ratio = results_df['sharperatio'].mean()
        median_sharpe_ratio = results_df['sharperatio'].median()

        avg_overall_return = results_df['profit'].mean()
        median_overall_return = results_df['profit'].median()
        overall_return_std = results_df['profit'].std()

        if config.strategy_type == "distance" or config.strategy_type == "cointegration":
            tup = (params["lookback"], params["enter_threshold"],
                   params["exit_threshold"], params["loss_limit"],
                   avg_sharpe_ratio, median_sharpe_ratio, avg_overall_return,
                   median_overall_return, overall_return_std, uuid_str)

        elif config.strategy_type == "kalman":
            tup = (params["enter_threshold"], params["exit_threshold"],
                   params["loss_limit"], avg_sharpe_ratio, median_sharpe_ratio,
                   avg_overall_return, median_overall_return,
                   overall_return_std, uuid_str)

        macro_results.append(tup)
        _logger.info("Performance of this set of parameters: {}".format(tup))

    macro_results_df = pd.DataFrame(macro_results)
    if config.strategy_type == "distance" or config.strategy_type == "cointegration":
        macro_results_df.columns = [
            'lookback', 'enter_threshold_size', 'exit_threshold_size',
            'loss_limit', 'avg_sharpe_ratio', 'median_sharpe_ratio',
            'avg_overall_return', 'median_overall_return',
            'overall_return_std', 'uuid'
        ]
    elif config.strategy_type == "kalman":
        macro_results_df.columns = [
            'enter_threshold_size', 'exit_threshold_size', 'loss_limit',
            'avg_sharpe_ratio', 'median_sharpe_ratio', 'avg_overall_return',
            'median_overall_return', 'overall_return_std', 'uuid'
        ]

    macro_results_df.to_csv(output_dir + '/' + 'summary.csv', index=False)
예제 #3
0
def main():
    # Get arguments parsed
    args = get_args()

    # Setup for logging
    output_dir = 'output/{}'.format(
        datetime.now(
            timezone('Asia/Hong_Kong')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3])
    create_dir(output_dir)
    LogHelper.setup(log_path='{}/training.log'.format(output_dir),
                    level_str='INFO')
    _logger = logging.getLogger(__name__)

    # Save the configuration for logging purpose
    save_yaml_config(args, path='{}/config.yaml'.format(output_dir))

    # Reproducibility
    set_seed(args.seed)

    # Get dataset
    dataset = SyntheticDataset(args.n, args.d, args.graph_type, args.degree,
                               args.sem_type, args.noise_scale,
                               args.dataset_type, args.x_dim)
    _logger.info('Finished generating dataset')

    model = GAE(args.n, args.d, args.x_dim, args.seed, args.num_encoder_layers,
                args.num_decoder_layers, args.hidden_size, args.latent_dim,
                args.l1_graph_penalty, args.use_float64)
    model.print_summary(print_func=model.logger.info)

    trainer = ALTrainer(args.init_rho, args.rho_thres, args.h_thres,
                        args.rho_multiply, args.init_iter, args.learning_rate,
                        args.h_tol, args.early_stopping,
                        args.early_stopping_thres)
    W_est = trainer.train(model, dataset.X, dataset.W, args.graph_thres,
                          args.max_iter, args.iter_step, output_dir)
    _logger.info('Finished training model')

    # Save raw recovered graph, ground truth and observational data after training
    np.save('{}/true_graph.npy'.format(output_dir), dataset.W)
    np.save('{}/observational_data.npy'.format(output_dir), dataset.X)
    np.save('{}/final_raw_recovered_graph.npy'.format(output_dir), W_est)

    # Plot raw recovered graph
    plot_recovered_graph(
        W_est,
        dataset.W,
        save_name='{}/raw_recovered_graph.png'.format(output_dir))

    _logger.info('Filter by constant threshold')
    W_est = W_est / np.max(np.abs(W_est))  # Normalize

    # Plot thresholded recovered graph
    W_est[np.abs(W_est) < args.graph_thres] = 0  # Thresholding
    plot_recovered_graph(
        W_est,
        dataset.W,
        save_name='{}/thresholded_recovered_graph.png'.format(output_dir))
    results_thresholded = count_accuracy(dataset.W, W_est)
    _logger.info('Results after thresholding by {}: {}'.format(
        args.graph_thres, results_thresholded))
def main():
    ##################################################################################################
    # Setup logger and output dir                                                                    #
    ##################################################################################################
    output_dir = config.output_dir

    if output_dir is None:
        output_dir = './jupyter_py/output/backtest-{}'.format(
            get_current_time())
    if not os.path.exists(output_dir):
        pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)

    # Setup logger
    LogHelper.setup(log_path='{}/backtesting.log'.format(output_dir),
                    log_level=logging.INFO)
    _logger = logging.getLogger(__name__)

    # Log all paremeters
    _logger.info("Backtest parameters: {}".format(vars(config)))

    # load data
    data = GSTools.load_csv_files(config.data_path)
    stk0, stk1 = config.stk0, config.stk1

    # check existence of stocks
    if (stk0 not in data):
        _logger.error("Stock symbol {} does not exist!".format(stk0))
        return
    if (stk1 not in data):
        _logger.error("Stock symbol {} does not exist!".format(stk1))
        return

    # size requirements
    pre_backtest_size = None
    if config.strategy_type == "cointegration" or config.strategy_type == "distance":
        pre_backtest_size = config.lookback
    elif config.strategy_type == "kalman":
        pre_backtest_size = config.kalman_estimation_length

    # select segment of data that we want
    data0, data1 = data[stk0].set_index("date"), data[stk1].set_index("date")
    start_date_dt = datetime.strptime(config.backtest_start, "%Y-%m-%d").date()
    end_date_dt = datetime.strptime(config.backtest_end, "%Y-%m-%d").date()

    data0 = data0[:start_date_dt].tail(pre_backtest_size).append(
        data0[start_date_dt:end_date_dt])
    data1 = data1[:start_date_dt].tail(pre_backtest_size).append(
        data1[start_date_dt:end_date_dt])
    data0 = data0.reset_index()
    data1 = data1.reset_index()

    # initialize cerebro
    cerebro = bt.Cerebro()

    # Create data feeds
    data0 = bt.feeds.PandasData(dataname=data0,
                                timeframe=(bt.TimeFrame.Days),
                                datetime=0,
                                open=1,
                                close=4)
    data1 = bt.feeds.PandasData(dataname=data1,
                                timeframe=(bt.TimeFrame.Days),
                                datetime=0,
                                open=1,
                                close=4)

    # add data feeds to cerebro
    cerebro.adddata(data0)
    cerebro.adddata(data1)

    # Add the strategy
    if config.strategy_type == "distance":
        cerebro.addstrategy(DistStrategy,
                            stk0_symbol=stk0,
                            stk1_symbol=stk1,
                            lookback=config.lookback,
                            max_lookback=pre_backtest_size,
                            enter_threshold_size=config.enter_threshold,
                            exit_threshold_size=config.exit_threshold,
                            loss_limit=config.loss_limit,
                            consider_borrow_cost=True,
                            consider_commission=False,
                            print_msg=True,
                            print_transaction=True)
    elif config.strategy_type == "cointegration":
        cerebro.addstrategy(CointStrategy,
                            stk0_symbol=stk0,
                            stk1_symbol=stk1,
                            lookback=config.lookback,
                            max_lookback=pre_backtest_size,
                            enter_threshold_size=config.enter_threshold,
                            exit_threshold_size=config.exit_threshold,
                            loss_limit=config.loss_limit,
                            consider_borrow_cost=True,
                            consider_commission=True,
                            print_msg=True,
                            print_transaction=True)
    elif config.strategy_type == "kalman":
        cerebro.addstrategy(CointKalmanStrategy,
                            stk0_symbol=stk0,
                            stk1_symbol=stk1,
                            max_lookback=pre_backtest_size,
                            enter_threshold_size=config.enter_threshold,
                            exit_threshold_size=config.exit_threshold,
                            loss_limit=config.loss_limit,
                            consider_borrow_cost=True,
                            consider_commission=True,
                            print_msg=True,
                            print_transaction=True)
    # Add analyzers
    cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='mysharpe')
    cerebro.addanalyzer(Metrics, lookback=pre_backtest_size, _name='metrics')

    # Add the commission - only stocks like a for each operation
    cerebro.broker.setcash(1000000)

    # And run it
    strat = cerebro.run()

    # get MICRO metrics
    results_dict = {}
    results_dict["pair"] = stk0 + "-" + stk1
    results_dict["sharperatio"] = strat[0].analyzers.mysharpe.get_analysis(
    )['sharperatio']
    results_dict["returnstd"] = strat[0].analyzers.metrics.returns_std()
    results_dict["avg_holding_period"] = strat[
        0].analyzers.metrics.avg_holding_period
    results_dict["n_trades"] = strat[0].analyzers.metrics.n_trades
    results_dict["startcash"] = cerebro.getbroker().startingcash
    results_dict["endcash"] = cerebro.getbroker().getvalue()
    results_dict["profit"] = (
        results_dict["endcash"] -
        results_dict["startcash"]) / results_dict["startcash"]
    _logger.info("[pair-performance]: {}".format(results_dict))