results = xr.align(*results, join='outer') results = [r.fillna(0) for r in results] output = sum(results) / len(results) return output config = json.load(open('config.json', 'r')) # single-pass data = qndata.futures_load_data(min_date='2005-01-01') output = bag_strategy(data, config) output = qnout.clean(output, data) stats = qnstats.calc_stat(data, output.sel(time=slice('2006-01-01', None))) print(stats.to_pandas().tail()) # qngraph.make_major_plots(stats) # works in juoyter qnout.check(output, data) qnout.write(output) # # multi-pass # # It may look slow, but it is ok. The evaluator will run only one iteration per day. # qnbk.backtest( # competition_type='futures', # lookback_period=365, # strategy=lambda d: bag_strategy(d, config), # # strategy=strategy_long, # start_date='2006-01-01' # )
def backtest(*, competition_type: str, strategy: tp.Union[ tp.Callable[[DataSet], xr.DataArray], tp.Callable[[DataSet, tp.Any], tp.Tuple[xr.DataArray, tp.Any]], ], load_data: tp.Union[tp.Callable[[int], tp.Union[DataSet,tp.Tuple[DataSet,np.ndarray]]],None] = None, lookback_period: int = 365, test_period: int = 365*15, start_date: tp.Union[np.datetime64, str, datetime.datetime, datetime.date, None] = None, window: tp.Union[tp.Callable[[DataSet,np.datetime64,int], DataSet], None] = None, step: int = 1, analyze: bool = True, build_plots: bool = True, collect_all_states: bool = False, ): """ :param competition_type: "futures" | "stocks" | "cryptofutures" | "stocks_long" | "crypto" :param load_data: data load function, accepts tail arg, returns time series and data :param lookback_period: calendar days period for one iteration :param strategy: accepts data, returns weights distribution for the last day :param test_period: test period (calendar days) :param start_date: start date for backtesting, overrides test period :param step: step size :param window: function which isolates data for one iterations :param analyze: analyze the output and calc stats :param build_plots: build plots (require analyze=True) :patam collect_all_states: collect all states instead of the last one :return: """ qndc.track_event("BACKTEST") if window is None: window = standard_window if load_data is None: load_data = lambda tail: qndata.load_data_by_type(competition_type, tail=tail) args_count = len(inspect.getfullargspec(strategy).args) strategy_wrap = (lambda d, s: strategy(d)) if args_count < 2 else strategy log_info("Run last pass...") log_info("Load data...") data = load_data(lookback_period) try: if data.name == 'stocks' and competition_type != 'stocks' and competition_type != 'stocks_long'\ or data.name == 'cryptofutures' and competition_type != 'cryptofutures' and competition_type != 'crypto_futures'\ or data.name == 'crypto' and competition_type != 'crypto'\ or data.name == 'futures' and competition_type != 'futures': log_err("WARNING! The data type and the competition type are mismatch.") except: pass data, time_series = extract_time_series(data) log_info("Run strategy...") state = None if is_submitted() and args_count > 1: state = qnstate.read() result = strategy_wrap(data, state) result, state = unpack_result(result) log_info("Load data for cleanup...") data = qndata.load_data_by_type(competition_type, assets=result.asset.values.tolist(), tail=60) result = qnout.clean(result, data) result.name = competition_type log_info("Write result...") qnout.write(result) qnstate.write(state) if is_submitted(): if args_count > 1: return result, state else: return result log_info("---") if start_date is None: start_date = pd.Timestamp.today().to_datetime64() - np.timedelta64(test_period-1, 'D') else: start_date = pd.Timestamp(start_date).to_datetime64() test_period = (pd.Timestamp.today().to_datetime64() - start_date) / np.timedelta64(1, 'D') log_info("Run first pass...") try: qndc.MAX_DATETIME_LIMIT = pd.Timestamp(start_date).to_pydatetime() qndc.MAX_DATE_LIMIT = qndc.MAX_DATETIME_LIMIT.date() print("Load data...") data = load_data(lookback_period) data, time_series = extract_time_series(data) print("Run strategy...") result = strategy_wrap(data, None) result, state = unpack_result(result) finally: qndc.MAX_DATE_LIMIT = None qndc.MAX_DATETIME_LIMIT = None log_info("---") log_info("Load full data...") data = load_data(test_period + lookback_period) data, time_series = extract_time_series(data) if len(time_series) < 1: log_err("Time series is empty") return log_info("---") result, state = run_iterations(time_series, data, window, start_date, lookback_period, strategy_wrap, step, collect_all_states) if result is None: return log_info("Load data for cleanup and analysis...") min_date = time_series[0] - np.timedelta64(60, 'D') data = qndata.load_data_by_type(competition_type, assets=result.asset.values.tolist(), min_date=str(min_date)[:10]) result = qnout.clean(result, data, competition_type) result.name = competition_type log_info("Write result...") qnout.write(result) qnstate.write(state) if analyze: log_info("---") analyze_results(result, data, competition_type, build_plots) if args_count > 1: return result, state else: return result
ma_slow = qnta.lwma(close, 50) ma_fast = qnta.lwma(close, 10) return xr.where(ma_fast > ma_slow, 1, -1) # SINGLE-PASS # --- # This is fast implementation, but it can easily become looking forward (common problem). # Use this approach for research and optimization. And use multi-pass to detect looking forward. data = qndata.cryptofutures.load_data(min_date="2013-04-01") # load data output = strategy(data) output = qnout.clean(output, data) # fix common errors qnout.check(output, data) # check that weights are correct: qnout.write(output) # write results, necessary for submission: stats = qnstats.calc_stat(data, output.sel(time=slice("2014-01-01", None))) # calc stats print(stats.to_pandas().tail()) # qngraph.make_major_plots(stats) # works in jupyter # --- # # # MULTI-PASS # # # --- # # Use this approach to make sure that your strategy is not looking forward. # weights = qnbt.backtest( # competition_type="cryptofutures", # BTC Futures contest # lookback_period=365, # lookback in calendar days # start_date="2014-01-01", # strategy=strategy,
def backtest_ml( *, train: tp.Callable[[DataSet], tp.Any], predict: tp.Union[tp.Callable[[tp.Any, DataSet], xr.DataArray], tp.Callable[[tp.Any, DataSet, tp.Any], tp.Tuple[xr.DataArray, tp.Any]], ], train_period: int = 4 * 365, retrain_interval: int = 365, predict_each_day: bool = False, retrain_interval_after_submit: tp.Union[int, None] = None, competition_type: str, load_data: tp.Union[tp.Callable[[int], tp.Union[DataSet, tp.Tuple[DataSet, np.ndarray]]], None] = None, lookback_period: int = 365, test_period: int = 365 * 15, start_date: tp.Union[np.datetime64, str, datetime.datetime, datetime.date, None] = None, end_date: tp.Union[np.datetime64, str, datetime.datetime, datetime.date, None] = None, window: tp.Union[tp.Callable[[DataSet, np.datetime64, int], DataSet], None] = None, analyze: bool = True, build_plots: bool = True, collect_all_states: bool = False, ): """ :param train: creates and trains model for prediction :param predict: predicts price movements and generates outputs :param train_period: the data length in trading days for training :param retrain_interval: how often to retrain the model(in calendar days) :param predict_each_day: perform predict for every day. Set True if you suspect the looking forward :param retrain_interval_after_submit: :param competition_type: "futures" | "stocks" | "cryptofutures" | "stocks_long" | "crypto" | "crypto_daily" :param load_data: data load function, accepts tail arg, returns time series and data :param lookback_period: the minimal calendar days period for one prediction :param test_period: test period (calendar days) :param start_date: start date for backtesting, overrides test period :param end_date: end date for backtesting, by default - now :param window: function which isolates data for one prediction or training :param analyze: analyze the output and calc stats :param build_plots: build plots (require analyze=True) :param collect_all_states: collect all states instead of the last one :return: """ qndc.track_event("ML_BACKTEST") if load_data is None: load_data = lambda tail: qndata.load_data_by_type(competition_type, tail=tail) if window is None: window = standard_window def copy_window(data, dt, tail): return copy.deepcopy(window(data, dt, tail)) args_count = len(inspect.getfullargspec(predict).args) predict_wrap = ( lambda m, d, s: predict(m, d)) if args_count < 3 else predict log_info("Run the last iteration...") data = load_data(max(train_period, lookback_period)) data, data_ts = extract_time_series(data) retrain_interval_cur = retrain_interval_after_submit if is_submitted( ) else retrain_interval if retrain_interval_cur is None: retrain_interval_cur = retrain_interval created = None model = None state = None if is_submitted() and (args_count > 2 or retrain_interval_cur > 1): state = qnstate.read() if state is not None: created = state[0] model = state[1] state = state[2] need_retrain = model is None or retrain_interval_cur == 1 \ or data_ts[-1] >= created + np.timedelta64(retrain_interval_cur, 'D') if need_retrain: train_data_slice = copy_window(data, data_ts[-1], train_period) model = train(train_data_slice) created = data_ts[-1] test_data_slice = copy_window(data, data_ts[-1], lookback_period) output = predict_wrap(model, test_data_slice, state) output, state = unpack_result(output) if data_ts[-1] in output.time: result = output.sel(time=[data_ts[-1]]) data = qndata.load_data_by_type(competition_type, assets=result.asset.values.tolist(), tail=60) result = qnout.clean(result, data, competition_type) result.name = competition_type qnout.write(result) if need_retrain and retrain_interval_cur > 1 or state is not None: qnstate.write((created, model, state)) if is_submitted(): if state is not None: return output, [state] if collect_all_states else state else: return output try: print("---") qndc.set_max_datetime(end_date) last_date = np.datetime64(qndc.parse_date(datetime.date.today())) if start_date is None: start_date = last_date - np.timedelta64(test_period - 1, 'D') else: start_date = pd.Timestamp(start_date).to_datetime64() test_period = (last_date - start_date) // np.timedelta64(1, 'D') # --- log_info("Run First Iteration...") # to catch most errors qndc.set_max_datetime(start_date) data = load_data(max(train_period, lookback_period)) data, data_ts = extract_time_series(data) train_data_slice = copy_window(data, data_ts[-1], train_period) model = train(train_data_slice) test_data_slice = copy_window(data, data_ts[-1], lookback_period) output = predict_wrap(model, test_data_slice, state) output, state = unpack_result(output) # --- print("---") qndc.set_max_datetime(end_date) log_info("Run all iterations...") log_info('Load data...') train_data = load_data(test_period + train_period + lookback_period) train_data, train_ts = extract_time_series(train_data) test_data = load_data(test_period) test_ts = extract_time_series(test_data)[1] log_info('Backtest...') outputs = [] t = test_ts[0] state = None model = None states = [] with progressbar.ProgressBar(max_value=len(test_ts), poll_interval=1) as p: go = True while go: end_t = t + np.timedelta64(max(retrain_interval - 1, 0), 'D') end_t = test_ts[test_ts <= end_t][-1] train_data_slice = copy_window(train_data, t, train_period) # print("train model t <=", str(t)[:10]) model = train(train_data_slice) # print("predict", str(t)[:10], "<= t <=", str(end_t)[:10]) if predict_each_day: for test_t in test_ts[np.logical_and( test_ts >= t, test_ts <= end_t)]: test_data_slice = copy_window(train_data, test_t, lookback_period) output = predict_wrap(model, test_data_slice, state) output, state = unpack_result(output) if collect_all_states: states.append(state) if test_t in output.time: output = output.sel(time=[test_t]) outputs.append(output) p.update(np.where(test_ts == test_t)[0].item()) else: test_data_slice = copy_window( train_data, end_t, lookback_period + retrain_interval) output = predict_wrap(model, test_data_slice, state) output, state = unpack_result(output) if collect_all_states: states.append(state) output = output.where(output.time >= t).where( output.time <= end_t).dropna('time', 'all') outputs.append(output) p.update(np.where(test_ts == end_t)[0].item()) next_t = test_ts[test_ts > end_t] if len(next_t) > 0: t = next_t[0] else: go = False result = xr.concat(outputs, dim='time') min_date = test_ts[0] - np.timedelta64(60, 'D') data = qndata.load_data_by_type(competition_type, min_date=str(min_date)[:10]) result = qnout.clean(result, data, competition_type) result.name = competition_type qnout.write(result) qnstate.write((t, model, state)) if analyze: log_info("---") analyze_results(result, data, competition_type, build_plots, start_date) if state is None: return result elif collect_all_states: return result, states else: return result, state finally: qndc.set_max_datetime(None)