Example #1
0
def print_table_from_perf_array(perf, factor_returns=None, show_baseline=False, show_header=True):
    APPROX_BDAYS_PER_MONTH = 21
    # APPROX_BDAYS_PER_YEAR = 252

    STAT_FUNCS_PCT = [
        'Annual return',
        'Cumulative returns',
        'Annual volatility',
        'Max drawdown',
        'Daily value at risk',
        'Daily turnover'
    ]

    arr = list(zip(*[(pData[0], pf.utils.extract_rets_pos_txn_from_zipline(pData[1])[0]) for pData in perf]))
    names_arr = arr[0]
    returns_arr = arr[1]

    # get headers
    if show_header:
        returns = returns_arr[0]  # take first row as representative of all other backtests
        date_rows = OrderedDict()
        if len(returns.index) > 0:
            date_rows['Start date'] = returns.index[0].strftime('%Y-%m-%d')
            date_rows['End date'] = returns.index[-1].strftime('%Y-%m-%d')
            date_rows['Total months'] = int(len(returns) / APPROX_BDAYS_PER_MONTH)
    else:
        date_rows = None

    # get peformance stats
    perf_stats_arr = []

    # show baseline as one of the columns
    if show_baseline:
        perf_stats_arr.append(
            perf_stats(factor_returns, factor_returns=factor_returns)
        )
        names_arr = ['Baseline'] + list(names_arr)

    for i in range(len(returns_arr)):
        perf_stats_arr.append(
            perf_stats(returns_arr[i], factor_returns=factor_returns)
        )

    perf_stats_all = pd.concat(perf_stats_arr, axis=1)

    for column in perf_stats_all.columns:
        for stat, value in perf_stats_all[column].iteritems():
            if stat in STAT_FUNCS_PCT:
                perf_stats_all.loc[stat, column] = str(np.round(value * 100, 3)) + '%'
    df = pd.DataFrame(perf_stats_all)
    df.columns = names_arr

    # print table
    print_table(df, float_format='{0:.2f}'.format, header_rows=date_rows)

    # return performance stats
    return df
def xgb_results():
    """This function trains a eXtreme Gradient Boosting Method and outputs the 
  out-of-sample performance from the validation and test sets
  """
    df1 = pd.DataFrame()
    df2 = pd.DataFrame()
    for pair in pairs:
        # retrieving the data and preparing the features
        dataset = gen_feat(pair)
        dataset.drop(['Open', 'High', 'Low', 'Close'], axis=1, inplace=True)

        # selecting the features to train on
        cols = list(dataset.columns)
        feats = cols[2:]

        #splitting into training, validation and test sets
        df_train = dataset.iloc[:-1000, :]
        train = df_train.copy()
        df_test = dataset.iloc[-1000:, :]
        test = df_test.copy()
        train_f = train.iloc[:-1000, :]
        valid = train.iloc[-1000:, :]

        #training the algorithm
        m = xgb(train_f[feats], train_f['dir'])

        # Pickle
        (f'{pair}_xgb.pkl').save(m)

        #preparing results for both validation and test sets
        valid_pred = m.predict(valid[feats])
        # valid_acc = accuracy_score(valid['dir'], valid_pred)
        test_pred = m.predict(test[feats])
        # test_acc = accuracy_score(test['dir'], test_pred)

        # Results for validation set
        valid_results = perf_stats(valid_pred *
                                   valid['ret']).to_frame(name=pair)
        valid_results = valid_results.rename_axis('pairs')

        # Results for test set
        test_results = perf_stats(test_pred * test['ret']).to_frame(name=pair)
        test_results = test_results.rename_axis('pairs')

        # Merge
        df1 = pd.concat([df1, valid_results], axis=1)
        df2 = pd.concat([df2, test_results], axis=1)

    #output
    return df1.T, df2.T
Example #3
0
def create_tearsheet(close, signal, file_name, report_type, benchmark_rets=None):
    logging.info(f"Creating {report_type} tearsheet for {file_name}")
    # Map long/short to long/flat
    # signal = (signal + 1) / 2
    pos_size = 10000
    df, df_wo_costs, cost_stats = simulate_pnl(close, signal, pos_size)
    returns = calc_returns(df)
    returns.name = report_type.title()
    returns_wo_costs = calc_returns(df_wo_costs)
    returns_wo_costs.name = report_type.title()

    if report_type == "primary":
        long_all = pd.DataFrame(1, columns=signal.columns, index=signal.index)
        df_bench, _, _ = simulate_pnl(close, long_all, pos_size)
        benchmark_rets = calc_returns(df_bench)
        benchmark_rets.name = "Benchmark (long all)"


    fig = pyfolio.create_returns_tear_sheet(
        returns, benchmark_rets=benchmark_rets, return_fig=True
    )
    fig_file_name = file_name.replace(".json", f"_{report_type}.png")
    fig.savefig(fig_file_name, bbox_inches="tight", pad_inches=0)

    p_stats = perf_stats(returns)
    p_stats_wo_costs = perf_stats(returns_wo_costs)
    dd_table = gen_drawdown_table(returns, 5)

    signal = signal.resample("1B").last()
    # Just-in-case normalize to 1 for reporting
    signal = signal / signal.max().max()
    signal.plot()
    signal = signal.set_index(signal.index.map(lambda x: x.isoformat()))


    return (
        returns,
        {
            "fig_file_name": str(Path(fig_file_name).basename()),
            "p_stats": p_stats.to_dict(),
            "p_stats_wo_costs": p_stats_wo_costs.to_dict(),
            "dd_table": dd_table.to_dict(),
            "signal": signal.to_csv(),  # CSVs are a lot more space-efficient for this dense 1500*50 table
            "cost_stats": cost_stats,
        },
    )
Example #4
0
def backtest_stats(account_value, value_col_name="account_value"):
    dr_test = get_daily_return(account_value, value_col_name=value_col_name)
    perf_stats_all = timeseries.perf_stats(
        returns=dr_test,
        positions=None,
        transactions=None,
        turnover_denom="AGB",
    )
    print(perf_stats_all)
    return perf_stats_all
Example #5
0
def strategy_performance(returns=None,
                         factor_returns=None,
                         positions=None,
                         transactions=None,
                         details=None):
    if details is not None:
        returns, factor_returns = details['return'], details['factor_return']
        positions, transactions = details[[details['symbol'].iloc[0],
                                           'cash']], details[[
                                               'symbol', 'price', 'amount'
                                           ]]
    new_perf_stats = OrderedDict()
    new_perf_stats['Backtest days'] = round(len(returns) / (360 * 24), 4)
    if transactions is not None:
        new_perf_stats['Trades/hour'] = round(
            len(transactions.iloc[transactions['amount'].nonzero()[0]]) / 360,
            4)
    perf_stats_all = timeseries.perf_stats(returns,
                                           factor_returns=factor_returns,
                                           positions=positions,
                                           transactions=transactions)

    for k in perf_stats_all.keys():
        j = ('Hourly' + k.replace('Annual', '')) if 'Annual' in k \
            else ('Tick' + k.replace('Daily', '')) if 'Daily' in k else k
        new_perf_stats[j] = perf_stats_all[k]

    perf_stats = pd.DataFrame(pd.Series(new_perf_stats), columns=['Results'])
    for column in perf_stats.columns:
        for stat, value in perf_stats[column].iteritems():
            if stat in STAT_FUNCS_PCT:
                perf_stats.loc[stat, column] = str(round(value * 100, 1)) + '%'

    utils.print_table(perf_stats, fmt='{0:.2f}')
    plot_returns(returns,
                 factor_returns,
                 transactions=transactions
                 if new_perf_stats['Backtest days'] <= 0.05 else None)
Example #6
0
    def analyze(self, result):
        for parameter, results, learn_data in result:
            if self.learn_data_ is not None and learn_data is not None:
                self.learn_data_ = pd.concat([self.learn_data_, learn_data])
            else:
                self.learn_data_ = learn_data

            if results is None:
                continue
            returns, positions, transactions = pf.utils.extract_rets_pos_txn_from_zipline(results)

            perf_stats = timeseries.perf_stats(returns,
                                               positions=positions,
                                               transactions=transactions)

            logging.info("Parameter:%s", parameter)
            logging.info(perf_stats)
            logging.info("Sharpe Ratio:{}%".format(np.round(perf_stats.loc['Sharpe ratio'] * 100)))
            logging.info("")

            cur_results = (results, perf_stats)
            if self.object_function_accept_(cur_results) and self.object_function_better_(cur_results, self.best_results_):
                self.best_results_ = cur_results
                self.parameter_ = parameter
Example #7
0
def perf(df: pd.DataFrame,
         multiplier: int = 0,
         bankroll: float = 15000,
         output: bool = True,
         compound: bool = False,
         price_column_name: str = 'price',
         slippage: float = 0) -> NamedTuple:
    """
    Extract performance indicators from simulation done by other functions.

    Args:
        df:         must have columns: 'price', 'position', all the information
                    about when and at what price position is entered and closed
                    is extracted from those two columns
        multiplier: futures multiplier to be used in fixed capital simulation,
                    if not given or zero simulation will be variable capital
                    without leverage
        output:     whether output is to be printed out
        compound:   for fixed capital simulation whether position size should
                    be adjusted based on current balance, ignored for variable
                    capital simulation
        price_column_name: which column in df contains price data
        slippage:   transaction cost expressed as multiple of min-tick

    Returns:
        Named tuple of resulting DataFrames for inspection:
        daily: daily returns (includes closed positions and mark-to-market
               for open positions)
        positions: closed positions
        transactions: source df for positions (for debugging)
        df: source df with signals (for debugging really)
    """
    df = df.copy()
    if price_column_name != 'price':
        df.rename(columns={price_column_name: 'price'}, inplace=True)

    if slippage:
        cost = get_min_tick(df.price) * slippage
    else:
        cost = 0

    df['transaction'] = (df['position'] -
                         df['position'].shift(1).fillna(0)).astype('int')

    df['slippage'] = df['transaction'].abs() * cost
    if (df.position[-1] != 0):  # & (df.transaction[-1] == 0):
        df.slippage[-1] += np.abs(df.position[-1]) * cost

    df['curr_price'] = (df['position'] - df['transaction']) * df['price']

    df['base_price'] = (df['price'].shift(1) *
                        df['position'].shift(1)).fillna(0)
    df['pnl'] = df['curr_price'] - df['base_price'] - df['slippage']

    slip_return = np.log((-df['slippage'] / df['price']) + 1).fillna(0)
    price_return = np.log((
        (df['curr_price'] - df['base_price']) / abs(df['base_price'])) +
                          1).fillna(0)
    df['lreturn'] = slip_return + price_return

    # get daily returns
    if multiplier:
        df['pnl_dollars'] = df['pnl'] * multiplier
        if compound:
            c = compound_pnl(df[['pnl_dollars', 'position', 'transaction']],
                             bankroll)
            df['size'] = c['size']  # for debugging only
            df['comp_pnl_dollars'] = c['comp_pnl_dollars']  # for debugging
            df['balance'] = c['balance']  # for debugging
            daily = daily_returns(c['comp_pnl_dollars'], bankroll)
        else:
            daily = daily_returns(df['pnl_dollars'], bankroll)
    else:
        daily = daily_returns_log_based(df)

    # get position stats
    if 'reason' in df.columns:
        p = pos(df['price'],
                df['transaction'],
                df['position'],
                df['reason'].shift(1),
                cost=cost)
    else:
        p = pos(df['price'], df['transaction'], df['position'], cost=cost)
    positions = p.positions
    assert round(positions.pnl.sum(), 4) == round(df.pnl.sum(), 4), \
        f'Dubious pnl calcs... {positions.pnl.sum()} vs. {df.pnl.sum()}'

    if multiplier:
        positions['pnl'] = positions['pnl'] * multiplier
    # pnl = positions['pnl'].sum()

    duration = positions['duration'].mean()
    win_pos = positions[positions['pnl'] > 0]
    # positions with zero gain are loss making
    loss_pos = positions[positions['pnl'] <= 0]
    # =========================================

    # container for all non-pyfolio stats
    stats = pd.Series()
    stats['Win percent'] = len(win_pos) / len(positions)
    stats['Average gain'] = win_pos.pnl.sum() / len(win_pos)
    stats['Average loss'] = loss_pos.pnl.sum() / len(loss_pos)
    stats['Avg gain/loss ratio'] = abs(stats['Average gain'] /
                                       stats['Average loss'])
    stats['Position EV'] = ((stats['Win percent'] * stats['Average gain']) + (
        (1 - stats['Win percent']) * stats['Average loss']))
    days = daily.returns.count()
    num_pos = len(win_pos) + len(loss_pos)
    stats['Positions per day'] = num_pos / days
    stats['Days per position'] = days / num_pos
    stats['Actual avg. duration'] = duration.round('min')

    stats['Days'] = days
    stats['Positions'] = num_pos
    stats['Trades'] = p.transactions
    stats['Monthly EV'] = (stats['Positions per day'] * stats['Position EV'] *
                           21)
    stats['Annual EV'] = 12 * stats['Monthly EV']

    # Generate output table
    pyfolio_stats = perf_stats(daily['returns'])
    stats = pyfolio_stats.append(stats)
    if output:
        print(stats.to_string())
        daily.path.plot(figsize=(20, 10), grid=True)
        # daily.balance.plot(figsize=(20, 10), grid=True)
    Results = namedtuple('Result',
                         'stats, daily, positions, df, opens, closes')
    return Results(stats, daily, positions, df, p[1], p[2])
sector_map = df.reindex(assets).fillna('Unknown').to_dict()

# ### Benchmark

# In[18]:

with pd.HDFStore(HDF_PATH) as store:
    benchmark_rets = store['sp500/prices'].close.pct_change()
benchmark_rets.name = 'S&P500'
benchmark_rets = benchmark_rets.tz_localize('UTC').filter(returns.index)
benchmark_rets.tail()

# In[19]:

perf_stats(returns=returns,
           factor_returns=benchmark_rets,
           positions=positions,
           transactions=transactions)

# In[20]:

plot_perf_stats(returns=returns, factor_returns=benchmark_rets)

# ## Returns Analysis

# Testing a trading strategy involves backtesting against historical data to fine-tune alpha factor parameters, as well as forward-testing against new market data to validate that the strategy performs well out of sample or if the parameters are too closely tailored to specific historical circumstances.
#
# Pyfolio allows for the designation of an out-of-sample period to simulate walk-forward testing. There are numerous aspects to take into account when testing a strategy to obtain statistically reliable results, which we will address here.

# In[21]:

oos_date = '2017-01-01'
Example #9
0
def stats_se(se):
    stats = perf_stats(se.dropna())
    stats['VaR'] = np.percentile(se.dropna(), 100 * 0.05)
    stats = stats.append(gen_drawdown_table(se, top=1).iloc[0, :])
    return stats