results = xr.align(*results, join='outer')
    results = [r.fillna(0) for r in results]
    output = sum(results) / len(results)
    return output


config = json.load(open('config.json', 'r'))

# single-pass
data = qndata.futures_load_data(min_date='2005-01-01')

output = bag_strategy(data, config)
output = qnout.clean(output, data)

stats = qnstats.calc_stat(data, output.sel(time=slice('2006-01-01', None)))
print(stats.to_pandas().tail())
# qngraph.make_major_plots(stats)  # works in juoyter

qnout.check(output, data)
qnout.write(output)

# # multi-pass
# # It may look slow, but it is ok. The evaluator will run only one iteration per day.
# qnbk.backtest(
#     competition_type='futures',
#     lookback_period=365,
#     strategy=lambda d: bag_strategy(d, config),
#     # strategy=strategy_long,
#     start_date='2006-01-01'
# )
Beispiel #2
0
def backtest(*,
             competition_type: str,
             strategy:  tp.Union[
                tp.Callable[[DataSet], xr.DataArray],
                tp.Callable[[DataSet, tp.Any], tp.Tuple[xr.DataArray, tp.Any]],
             ],
             load_data: tp.Union[tp.Callable[[int], tp.Union[DataSet,tp.Tuple[DataSet,np.ndarray]]],None] = None,
             lookback_period: int = 365,
             test_period: int = 365*15,
             start_date: tp.Union[np.datetime64, str, datetime.datetime, datetime.date, None] = None,
             window: tp.Union[tp.Callable[[DataSet,np.datetime64,int], DataSet], None] = None,
             step: int = 1,
             analyze: bool = True,
             build_plots: bool = True,
             collect_all_states: bool = False,
             ):
    """

    :param competition_type: "futures" | "stocks" | "cryptofutures" | "stocks_long" | "crypto"
    :param load_data: data load function, accepts tail arg, returns time series and data
    :param lookback_period: calendar days period for one iteration
    :param strategy: accepts data, returns weights distribution for the last day
    :param test_period: test period (calendar days)
    :param start_date: start date for backtesting, overrides test period
    :param step: step size
    :param window: function which isolates data for one iterations
    :param analyze: analyze the output and calc stats
    :param build_plots: build plots (require analyze=True)
    :patam collect_all_states: collect all states instead of the last one
    :return:
    """
    qndc.track_event("BACKTEST")

    if window is None:
        window = standard_window

    if load_data is None:
        load_data = lambda tail: qndata.load_data_by_type(competition_type, tail=tail)

    args_count = len(inspect.getfullargspec(strategy).args)
    strategy_wrap = (lambda d, s: strategy(d)) if args_count < 2 else strategy

    log_info("Run last pass...")
    log_info("Load data...")
    data = load_data(lookback_period)
    try:
        if data.name == 'stocks' and competition_type != 'stocks' and competition_type != 'stocks_long'\
            or data.name == 'cryptofutures' and competition_type != 'cryptofutures' and competition_type != 'crypto_futures'\
            or data.name == 'crypto' and competition_type != 'crypto'\
            or data.name == 'futures' and competition_type != 'futures':
            log_err("WARNING! The data type and the competition type are mismatch.")
    except:
        pass
    data, time_series = extract_time_series(data)

    log_info("Run strategy...")
    state = None
    if is_submitted() and args_count > 1:
        state = qnstate.read()
    result = strategy_wrap(data, state)
    result, state = unpack_result(result)

    log_info("Load data for cleanup...")
    data = qndata.load_data_by_type(competition_type, assets=result.asset.values.tolist(), tail=60)
    result = qnout.clean(result, data)
    result.name = competition_type
    log_info("Write result...")
    qnout.write(result)
    qnstate.write(state)

    if is_submitted():
        if args_count > 1:
            return result, state
        else:
            return result

    log_info("---")

    if start_date is None:
        start_date = pd.Timestamp.today().to_datetime64() - np.timedelta64(test_period-1, 'D')
    else:
        start_date = pd.Timestamp(start_date).to_datetime64()
        test_period = (pd.Timestamp.today().to_datetime64() - start_date) / np.timedelta64(1, 'D')

    log_info("Run first pass...")
    try:
        qndc.MAX_DATETIME_LIMIT = pd.Timestamp(start_date).to_pydatetime()
        qndc.MAX_DATE_LIMIT = qndc.MAX_DATETIME_LIMIT.date()
        print("Load data...")
        data = load_data(lookback_period)
        data, time_series = extract_time_series(data)
        print("Run strategy...")
        result = strategy_wrap(data, None)
        result, state = unpack_result(result)
    finally:
        qndc.MAX_DATE_LIMIT = None
        qndc.MAX_DATETIME_LIMIT = None

    log_info("---")

    log_info("Load full data...")
    data = load_data(test_period + lookback_period)
    data, time_series = extract_time_series(data)
    if len(time_series) < 1:
        log_err("Time series is empty")
        return

    log_info("---")
    result, state = run_iterations(time_series, data, window, start_date, lookback_period, strategy_wrap, step, collect_all_states)
    if result is None:
        return

    log_info("Load data for cleanup and analysis...")
    min_date = time_series[0] - np.timedelta64(60, 'D')
    data = qndata.load_data_by_type(competition_type, assets=result.asset.values.tolist(), min_date=str(min_date)[:10])
    result = qnout.clean(result, data, competition_type)
    result.name = competition_type
    log_info("Write result...")
    qnout.write(result)
    qnstate.write(state)

    if analyze:
        log_info("---")
        analyze_results(result, data, competition_type, build_plots)

    if args_count > 1:
        return result, state
    else:
        return result
Beispiel #3
0
    ma_slow = qnta.lwma(close, 50)
    ma_fast = qnta.lwma(close, 10)
    return xr.where(ma_fast > ma_slow, 1, -1)


# SINGLE-PASS
# ---
# This is fast implementation, but it can easily become looking forward (common problem).
# Use this approach for research and optimization. And use multi-pass to detect looking forward.
data = qndata.cryptofutures.load_data(min_date="2013-04-01")  # load data

output = strategy(data)
output = qnout.clean(output, data) # fix common errors

qnout.check(output, data) # check that weights are correct:
qnout.write(output) # write results, necessary for submission:

stats = qnstats.calc_stat(data, output.sel(time=slice("2014-01-01", None))) # calc stats
print(stats.to_pandas().tail())
# qngraph.make_major_plots(stats) # works in jupyter
# ---


# # # MULTI-PASS
# # # ---
# # Use this approach to make sure that your strategy is not looking forward.
# weights = qnbt.backtest(
#     competition_type="cryptofutures",  # BTC Futures contest
#     lookback_period=365,  # lookback in calendar days
#     start_date="2014-01-01",
#     strategy=strategy,
Beispiel #4
0
def backtest_ml(
    *,
    train: tp.Callable[[DataSet], tp.Any],
    predict: tp.Union[tp.Callable[[tp.Any, DataSet], xr.DataArray],
                      tp.Callable[[tp.Any, DataSet, tp.Any],
                                  tp.Tuple[xr.DataArray, tp.Any]], ],
    train_period: int = 4 * 365,
    retrain_interval: int = 365,
    predict_each_day: bool = False,
    retrain_interval_after_submit: tp.Union[int, None] = None,
    competition_type: str,
    load_data: tp.Union[tp.Callable[[int], tp.Union[DataSet,
                                                    tp.Tuple[DataSet,
                                                             np.ndarray]]],
                        None] = None,
    lookback_period: int = 365,
    test_period: int = 365 * 15,
    start_date: tp.Union[np.datetime64, str, datetime.datetime, datetime.date,
                         None] = None,
    end_date: tp.Union[np.datetime64, str, datetime.datetime, datetime.date,
                       None] = None,
    window: tp.Union[tp.Callable[[DataSet, np.datetime64, int], DataSet],
                     None] = None,
    analyze: bool = True,
    build_plots: bool = True,
    collect_all_states: bool = False,
):
    """

    :param train: creates and trains model for prediction
    :param predict: predicts price movements and generates outputs
    :param train_period: the data length in trading days for training
    :param retrain_interval: how often to retrain the model(in calendar days)
    :param predict_each_day: perform predict for every day. Set True if you suspect the looking forward
    :param retrain_interval_after_submit:
    :param competition_type: "futures" | "stocks" | "cryptofutures" | "stocks_long" | "crypto" | "crypto_daily"
    :param load_data: data load function, accepts tail arg, returns time series and data
    :param lookback_period: the minimal calendar days period for one prediction
    :param test_period:  test period (calendar days)
    :param start_date: start date for backtesting, overrides test period
    :param end_date: end date for backtesting, by default - now
    :param window: function which isolates data for one prediction or training
    :param analyze: analyze the output and calc stats
    :param build_plots: build plots (require analyze=True)
    :param collect_all_states: collect all states instead of the last one
    :return:
    """
    qndc.track_event("ML_BACKTEST")

    if load_data is None:
        load_data = lambda tail: qndata.load_data_by_type(competition_type,
                                                          tail=tail)

    if window is None:
        window = standard_window

    def copy_window(data, dt, tail):
        return copy.deepcopy(window(data, dt, tail))

    args_count = len(inspect.getfullargspec(predict).args)
    predict_wrap = (
        lambda m, d, s: predict(m, d)) if args_count < 3 else predict

    log_info("Run the last iteration...")

    data = load_data(max(train_period, lookback_period))
    data, data_ts = extract_time_series(data)

    retrain_interval_cur = retrain_interval_after_submit if is_submitted(
    ) else retrain_interval
    if retrain_interval_cur is None:
        retrain_interval_cur = retrain_interval
    created = None
    model = None
    state = None
    if is_submitted() and (args_count > 2 or retrain_interval_cur > 1):
        state = qnstate.read()
        if state is not None:
            created = state[0]
            model = state[1]
            state = state[2]
    need_retrain = model is None or retrain_interval_cur == 1 \
                   or data_ts[-1] >= created + np.timedelta64(retrain_interval_cur, 'D')
    if need_retrain:
        train_data_slice = copy_window(data, data_ts[-1], train_period)
        model = train(train_data_slice)
        created = data_ts[-1]

    test_data_slice = copy_window(data, data_ts[-1], lookback_period)
    output = predict_wrap(model, test_data_slice, state)
    output, state = unpack_result(output)

    if data_ts[-1] in output.time:
        result = output.sel(time=[data_ts[-1]])

    data = qndata.load_data_by_type(competition_type,
                                    assets=result.asset.values.tolist(),
                                    tail=60)
    result = qnout.clean(result, data, competition_type)

    result.name = competition_type
    qnout.write(result)

    if need_retrain and retrain_interval_cur > 1 or state is not None:
        qnstate.write((created, model, state))

    if is_submitted():
        if state is not None:
            return output, [state] if collect_all_states else state
        else:
            return output

    try:
        print("---")
        qndc.set_max_datetime(end_date)

        last_date = np.datetime64(qndc.parse_date(datetime.date.today()))
        if start_date is None:
            start_date = last_date - np.timedelta64(test_period - 1, 'D')
        else:
            start_date = pd.Timestamp(start_date).to_datetime64()
            test_period = (last_date - start_date) // np.timedelta64(1, 'D')

        # ---
        log_info("Run First Iteration...")  # to catch most errors
        qndc.set_max_datetime(start_date)
        data = load_data(max(train_period, lookback_period))
        data, data_ts = extract_time_series(data)

        train_data_slice = copy_window(data, data_ts[-1], train_period)
        model = train(train_data_slice)

        test_data_slice = copy_window(data, data_ts[-1], lookback_period)
        output = predict_wrap(model, test_data_slice, state)
        output, state = unpack_result(output)

        # ---
        print("---")
        qndc.set_max_datetime(end_date)
        log_info("Run all iterations...")
        log_info('Load data...')

        train_data = load_data(test_period + train_period + lookback_period)
        train_data, train_ts = extract_time_series(train_data)

        test_data = load_data(test_period)
        test_ts = extract_time_series(test_data)[1]

        log_info('Backtest...')
        outputs = []
        t = test_ts[0]
        state = None
        model = None
        states = []
        with progressbar.ProgressBar(max_value=len(test_ts),
                                     poll_interval=1) as p:
            go = True
            while go:
                end_t = t + np.timedelta64(max(retrain_interval - 1, 0), 'D')
                end_t = test_ts[test_ts <= end_t][-1]

                train_data_slice = copy_window(train_data, t, train_period)
                # print("train model t <=", str(t)[:10])
                model = train(train_data_slice)
                # print("predict", str(t)[:10], "<= t <=", str(end_t)[:10])
                if predict_each_day:
                    for test_t in test_ts[np.logical_and(
                            test_ts >= t, test_ts <= end_t)]:
                        test_data_slice = copy_window(train_data, test_t,
                                                      lookback_period)
                        output = predict_wrap(model, test_data_slice, state)
                        output, state = unpack_result(output)
                        if collect_all_states:
                            states.append(state)
                        if test_t in output.time:
                            output = output.sel(time=[test_t])
                            outputs.append(output)
                            p.update(np.where(test_ts == test_t)[0].item())
                else:
                    test_data_slice = copy_window(
                        train_data, end_t, lookback_period + retrain_interval)
                    output = predict_wrap(model, test_data_slice, state)
                    output, state = unpack_result(output)
                    if collect_all_states:
                        states.append(state)
                    output = output.where(output.time >= t).where(
                        output.time <= end_t).dropna('time', 'all')
                    outputs.append(output)

                p.update(np.where(test_ts == end_t)[0].item())

                next_t = test_ts[test_ts > end_t]
                if len(next_t) > 0:
                    t = next_t[0]
                else:
                    go = False

            result = xr.concat(outputs, dim='time')
            min_date = test_ts[0] - np.timedelta64(60, 'D')
            data = qndata.load_data_by_type(competition_type,
                                            min_date=str(min_date)[:10])
            result = qnout.clean(result, data, competition_type)
            result.name = competition_type
            qnout.write(result)
            qnstate.write((t, model, state))
            if analyze:
                log_info("---")
                analyze_results(result, data, competition_type, build_plots,
                                start_date)
                if state is None:
                    return result
                elif collect_all_states:
                    return result, states
                else:
                    return result, state
    finally:
        qndc.set_max_datetime(None)