Example #1
0
def augmentation(X, Y, noise = False, bootstrapping = True, noiseSTD = [0.1/2, 0.1/2, 0.01/2, 0.0002/2,0.01/2,0.02/2], nr_boot =1000, bootstrap_bl_size = 488, boot_freq = 100):
    
    if noise:
        Xn = X.copy()
        for i, j, k in np.ndindex(X.shape):
            Xn[i, j, k] += np.random.normal(0, 1)*noiseSTD[k] 

        X = np.vstack([X, Xn])
        Y = np.vstack([Y, Y])
        
    if bootstrapping:
        Xb = X.copy()
        pt = PowerTransformer(method='yeo-johnson', standardize=True)
        
        for i in range(Xb.shape[0]):
            pt.fit(Xb[i])
            lambda_param = pt.lambdas_
            transformed = pt.transform(Xb[i])
            result = seasonal_decompose(transformed, model='additive', freq=boot_freq)
            
            # Moving Block Bootstrap on Residuals
            bootstrapRes = MBB(bootstrap_bl_size, result.resid)
            for data in bootstrapRes.bootstrap(nr_boot):
                bs_x = data[0][0]
            
            reconSeriesYC = result.trend + result.seasonal + bs_x
            Xb[i] = pt.inverse_transform(reconSeriesYC)
        
        for i,j,k in np.ndindex(X.shape):
            if np.isnan(Xb[i,j,k]):
                Xb[i,j,k] = X[i,j,k]
        X = np.vstack([X, Xb])
        Y = np.vstack([Y, Y])

    return X, Y
Example #2
0
def test_uneven_sampling(bs_setup):
    bs = MovingBlockBootstrap(block_size=31, y=bs_setup.y_series, x=bs_setup.x_df)
    for _, kw in bs.bootstrap(10):
        assert kw["y"].shape == bs_setup.y_series.shape
        assert kw["x"].shape == bs_setup.x_df.shape
    bs = CircularBlockBootstrap(block_size=31, y=bs_setup.y_series, x=bs_setup.x_df)
    for _, kw in bs.bootstrap(10):
        assert kw["y"].shape == bs_setup.y_series.shape
        assert kw["x"].shape == bs_setup.x_df.shape
Example #3
0
 def test_uneven_sampling(self):
     bs = MovingBlockBootstrap(block_size=31, y=self.y_series, x=self.x_df)
     for _, kw in bs.bootstrap(10):
         assert kw['y'].shape == self.y_series.shape
         assert kw['x'].shape == self.x_df.shape
     bs = CircularBlockBootstrap(block_size=31, y=self.y_series, x=self.x_df)
     for _, kw in bs.bootstrap(10):
         assert kw['y'].shape == self.y_series.shape
         assert kw['x'].shape == self.x_df.shape
Example #4
0
 def test_uneven_sampling(self):
     bs = MovingBlockBootstrap(block_size=31, y=self.y_series, x=self.x_df)
     for _, kw in bs.bootstrap(10):
         assert kw['y'].shape == self.y_series.shape
         assert kw['x'].shape == self.x_df.shape
     bs = CircularBlockBootstrap(block_size=31, y=self.y_series, x=self.x_df)
     for _, kw in bs.bootstrap(10):
         assert kw['y'].shape == self.y_series.shape
         assert kw['x'].shape == self.x_df.shape
Example #5
0
def moving_block_bootstrap_method(X, Y, block_size=50, n_samples=50):

    boot_samples = []
    bs = MovingBlockBootstrap(block_size, X, y=Y)

    for samp in bs.bootstrap(n_samples):
        boot_samples.append((samp[0][0], samp[1]['y']))

    return boot_samples
Example #6
0
    def test_smoke(self):
        num_bootstrap = 20

        def func(y):
            return y.mean(axis=0)

        bs = StationaryBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = MovingBlockBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = CircularBlockBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = MovingBlockBootstrap(10, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = CircularBlockBootstrap(10, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
Example #7
0
def test_str(bs_setup):
    bs = IIDBootstrap(bs_setup.y_series)
    expected = "IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)"
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)
    expected = ("<strong>IID Bootstrap</strong>(" +
                "<strong>no. pos. inputs</strong>: 1, " +
                "<strong>no. keyword inputs</strong>: 0, " +
                "<strong>ID</strong>: " + hex(id(bs)) + ")")
    assert_equal(bs._repr_html(), expected)

    bs = StationaryBootstrap(10, bs_setup.y_series, bs_setup.x_df)
    expected = ("Stationary Bootstrap(block size: 10, no. pos. "
                "inputs: 2, no. keyword inputs: 0)")
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)

    bs = CircularBlockBootstrap(block_size=20,
                                y=bs_setup.y_series,
                                x=bs_setup.x_df)
    expected = ("Circular Block Bootstrap(block size: 20, no. pos. "
                "inputs: 0, no. keyword inputs: 2)")
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)
    expected = ("<strong>Circular Block Bootstrap</strong>" +
                "(<strong>block size</strong>: 20, " +
                "<strong>no. pos. inputs</strong>: 0, " +
                "<strong>no. keyword inputs</strong>: 2," +
                " <strong>ID</strong>: " + hex(id(bs)) + ")")
    assert_equal(bs._repr_html(), expected)

    bs = MovingBlockBootstrap(block_size=20,
                              y=bs_setup.y_series,
                              x=bs_setup.x_df)
    expected = ("Moving Block Bootstrap(block size: 20, no. pos. "
                "inputs: 0, no. keyword inputs: 2)")
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)
    expected = ("<strong>Moving Block Bootstrap</strong>" +
                "(<strong>block size</strong>: 20, " +
                "<strong>no. pos. inputs</strong>: 0, " +
                "<strong>no. keyword inputs</strong>: 2," +
                " <strong>ID</strong>: " + hex(id(bs)) + ")")
    assert_equal(bs._repr_html(), expected)
Example #8
0
    def test_str(self):
        bs = IIDBootstrap(self.y_series)
        expected = 'IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)
        expected = '<strong>IID Bootstrap</strong>(' + \
                   '<strong>no. pos. inputs</strong>: 1, ' + \
                   '<strong>no. keyword inputs</strong>: 0, ' + \
                   '<strong>ID</strong>: ' + hex(id(bs)) + ')'
        assert_equal(bs._repr_html(), expected)

        bs = StationaryBootstrap(10, self.y_series, self.x_df)
        expected = 'Stationary Bootstrap(block size: 10, no. pos. ' \
                   'inputs: 2, no. keyword inputs: 0)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)

        bs = CircularBlockBootstrap(block_size=20,
                                    y=self.y_series,
                                    x=self.x_df)
        expected = 'Circular Block Bootstrap(block size: 20, no. pos. ' \
                   'inputs: 0, no. keyword inputs: 2)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)
        expected = '<strong>Circular Block Bootstrap</strong>' + \
                   '(<strong>block size</strong>: 20, ' \
                   + '<strong>no. pos. inputs</strong>: 0, ' + \
                   '<strong>no. keyword inputs</strong>: 2,' + \
                   ' <strong>ID</strong>: ' + hex(id(bs)) + ')'
        assert_equal(bs._repr_html(), expected)

        bs = MovingBlockBootstrap(block_size=20, y=self.y_series, x=self.x_df)
        expected = 'Moving Block Bootstrap(block size: 20, no. pos. ' \
                   'inputs: 0, no. keyword inputs: 2)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)
        expected = '<strong>Moving Block Bootstrap</strong>' + \
                   '(<strong>block size</strong>: 20, ' \
                   + '<strong>no. pos. inputs</strong>: 0, ' + \
                   '<strong>no. keyword inputs</strong>: 2,' + \
                   ' <strong>ID</strong>: ' + hex(id(bs)) + ')'
        assert_equal(bs._repr_html(), expected)
Example #9
0
    def mbb_bootstrap(self):
        """
        return paths simulated using the moving block bootstrap 

        params:
        -------

            - self: see above

        return:
        -------

            - none
        """

        print("\nMB BOOTSTRAP \n")
        bs = MovingBlockBootstrap(self.blocksize, self.data)
        out_mbb = boot(N_paths=self.n_paths,
                       method=bs,
                       obs_path=self.data,
                       add_noise=self.add_noise)
        if self.store_sim:
            self.simulated_paths['MBB'] = out_mbb.iloc[:, :out_mbb.
                                                       shape[1] if out_mbb.
                                                       shape[1] < 100 else 100]

        self.store_output = investment_horizons(
            observed_path=self.data,
            sims=out_mbb,
            investment_horizons=self.ih,
            freq=self.frequency,
            sum_stats=self.stats,
            perf_functions=self.perf_functions,
            store_output_dic=self.store_output,
            simulation_tech='MBB',
            plotting=self.plotting)

        return None
Example #10
0
def metrics():
    import hypothesisTest.bets as bets
    import hypothesisTest.data as data
    import numpy as np
    import hypothesisTest.plots as plots
    import hypothesisTest.helper_functions as hf
    import pandas as pd
    import math as mth
    from scipy import stats

    datasets = data.datasets_dict

    benchmark_returns = datasets['benchmark'][bets.clean_values_from_weights]
    rf_returns = datasets['rf_rate'][bets.clean_values_from_weights]

    fama_factors = datasets['Fama_French'][bets.clean_values_from_weights]

    cleaned_index = bets.cleaned_index_weights

    res_dict = dict()
    ##############################################################################################
    res_dict['cleaned_index'] = cleaned_index

    #A. General Characterstics
    #1. Time range
    res_dict['START_DATE'] = cleaned_index.min()
    res_dict['END_DATE'] = cleaned_index.max()
    res_dict['TIME_RANGE_DAYS'] = (
        (cleaned_index.max() -
         cleaned_index.min()).astype('timedelta64[D]')) / np.timedelta64(
             1, 'D')
    #years = ((end_date-start_date).astype('timedelta64[Y]'))/np.timedelta64(1, 'Y')
    res_dict['TOTAL_BARS'] = len(cleaned_index)

    #2. Average AUM
    res_dict['AVERAGE_AUM'] = np.nanmean(
        np.nansum(np.abs(bets.dollars_at_open), axis=1))

    #3. Capacity of Strategy

    #4. Leverage (!!! Double check -something to do with sum of long_lev and short_lev > 1)
    res_dict['AVERAGE_POSITION_SIZE'] = np.nanmean(
        np.nansum(bets.dollars_at_open, axis=1))

    res_dict['NET_LEVERAGE'] = round(
        res_dict['AVERAGE_POSITION_SIZE'] / res_dict['AVERAGE_AUM'], 2)

    #5. Turnover
    daily_shares = np.nansum(bets.purchased_shares, axis=1)
    daily_value_traded = np.nansum(np.abs(bets.dollars_at_open), axis=1)
    daily_turnover = daily_shares / (2 * daily_value_traded)
    res_dict['AVERAGE_DAILY_TURNOVER'] = np.mean(daily_turnover)

    #6. Correlation to underlying
    res_dict['CORRELATION_WITH_UNDERLYING'] = np.corrcoef(
        bets.underlying_daily_returns, bets.strategy_daily_returns)[0, 1]

    #7. Ratio of longs

    res_dict['LONG_RATIO'] = ((bets.cleaned_strategy_weights > 0).sum()) / (
        np.ones(bets.cleaned_strategy_weights.shape, dtype=bool).sum())

    #8. Maximum dollar position size

    res_dict['MAX_SIZE'] = np.nanmax(np.abs(bets.cleaned_strategy_weights))

    #9. Stability of Wealth Process

    cum_log_returns = np.log1p(bets.strategy_daily_returns).cumsum()
    rhat = stats.linregress(np.arange(len(cum_log_returns)),
                            cum_log_returns)[2]
    res_dict['STABILITY_OF_WEALTH_PROCESS'] = rhat**2

    ##############################################################################################
    # B. Performance measures
    #1. Equity curves
    def equity_curve(amount, ret):
        ret = hf.shift_array(ret, 1, 0)
        return amount * np.cumprod(1 + ret)

    curves = dict()
    curves['Strategy'] = equity_curve(bets.starting_value,
                                      bets.strategy_daily_returns)
    curves['Buy & Hold Underlying'] = equity_curve(
        bets.starting_value, bets.underlying_daily_returns)
    curves['Benchmark'] = equity_curve(bets.starting_value, benchmark_returns)
    curves['Risk free Asset'] = equity_curve(bets.starting_value, rf_returns)
    curves['Long Contribution'] = equity_curve(bets.starting_value,
                                               bets.long_contribution)
    curves['Short Contribution'] = equity_curve(bets.starting_value,
                                                bets.short_contribution)

    plot_data_DF1 = pd.DataFrame([])
    plot_data_DF1['time'] = cleaned_index
    plot_data_DF1['time'] = plot_data_DF1['time'].astype(np.int64) / int(1e6)
    plot_data_DF1['yValue'] = curves['Strategy']

    # plot_data_DF2 = pd.DataFrame([])
    # plot_data_DF2['time']=plot_data_DF1['time']
    # plot_data_DF2['yValue'] = curves['Benchmark']

    plotData1 = [[plot_data_DF1['time'][n], curves['Strategy'][n]]
                 for n in range(len(cleaned_index))]
    plotData2 = [[plot_data_DF1['time'][n], curves['Benchmark'][n]]
                 for n in range(len(cleaned_index))]
    # for n in range(len(cleaned_index)):
    #     plotData1.append([plot_data_DF1['time'][n], curves['Strategy'][n]])
    #     plotData2.append([plot_data_DF1['time'][n], curves['Benchmark'][n]])

    # plots.equity_curves_plot(cleaned_index, curves)

    res_dict['curves'] = curves

    #2. Pnl from long positions check long_pnl
    res_dict['PNL_FROM_STRATEGY'] = curves['Strategy'][-1]
    res_dict['PNL_FROM_LONG'] = curves['Long Contribution'][-1]

    #3. Annualized rate of return (Check this)
    res_dict['ANNUALIZED_AVERAGE_RATE_OF_RETURN'] = round(
        ((1 + np.mean(bets.strategy_daily_returns))**(365) - 1) * 100, 2)
    res_dict['CUMMULATIVE_RETURN'] = (
        np.cumprod(1 + bets.strategy_daily_returns)[-1] - 1)

    yrs = res_dict['TOTAL_BARS'] / 252
    res_dict['CAGR_STRATEGY'] = (
        (curves['Strategy'][-1] / curves['Strategy'][0])**(1 / yrs)) - 1
    res_dict['CAGR_BENCHMARK'] = (
        (curves['Benchmark'][-1] / curves['Benchmark'][0])**(1 / yrs)) - 1
    #4. Hit Ratio

    res_dict['HIT_RATIO'] = round(
        ((bets.daily_pnl > 0).sum()) / ((bets.daily_pnl > 0).sum() +
                                        (bets.daily_pnl < 0).sum() +
                                        (bets.daily_pnl == 0).sum()) * 100, 2)

    ##############################################################################################
    # C. Runs
    # 1. Runs concentration
    def runs(returns):
        wght = returns / returns.sum()
        hhi = (wght**2).sum()
        hhi = (hhi - returns.shape[0]**-1) / (1. - returns.shape[0]**-1)
        return hhi

    res_dict['HHI_PLUS'] = runs(
        bets.strategy_daily_returns[bets.strategy_daily_returns > 0])
    res_dict['HHI_MINUS'] = runs(
        bets.strategy_daily_returns[bets.strategy_daily_returns < 0])

    # 2. Drawdown and Time under Water

    def MDD(returns):
        def returns_to_dollars(amount, ret):
            return amount * np.cumprod(1 + ret)

        doll_series = pd.Series(returns_to_dollars(100, returns))

        Roll_Max = doll_series.cummax()
        Daily_Drawdown = doll_series / Roll_Max - 1.0
        Max_Daily_Drawdown = Daily_Drawdown.cummin()
        return Max_Daily_Drawdown

    DD_strategy = MDD(bets.strategy_daily_returns)
    DD_benchmark = MDD(benchmark_returns)
    res_dict['MDD_STRATEGY'] = DD_strategy.min()
    res_dict['MDD_BENCHMARK'] = DD_benchmark.min()

    #3. 95 percentile
    res_dict['95PERCENTILE_DRAWDOWN_STRATEGY'] = DD_strategy.quantile(0.05)
    res_dict['95PERCENTILE_DRAWDOWN_BENCHMARK'] = DD_benchmark.quantile(0.05)

    #############################################################################################
    # D. Efficiency

    #1. Sharpe Ratio
    excess_returns = bets.strategy_daily_returns - rf_returns
    res_dict['SHARPE_RATIO'] = round(
        mth.sqrt(252) * np.mean(excess_returns) / np.std(excess_returns), 2)

    #from statsmodels.graphics.tsaplots import plot_acf
    #plot_acf(excess_returns)
    #2. sortino Ratio
    res_dict['SORTINO_RATIO'] = mth.sqrt(252) * np.mean(
        excess_returns) / np.std(
            excess_returns[excess_returns < np.mean(excess_returns)])

    #2.Probabilistic Sharpe ratio
    from scipy.stats import norm
    from scipy.stats import kurtosis, skew
    g_3 = skew(excess_returns)
    g_4 = kurtosis(excess_returns)
    res_dict['PROBABILISTIC_SHARPE_RATIO'] = norm.cdf(
        ((res_dict['SHARPE_RATIO'] - 2) * mth.sqrt(len(excess_returns) - 1)) /
        (mth.sqrt(1 - (g_3 * res_dict['SHARPE_RATIO']) +
                  (0.25 * (g_4 - 1) * res_dict['SHARPE_RATIO'] *
                   res_dict['SHARPE_RATIO']))))

    #3.Information ratio
    excess_returns_benchmark = bets.strategy_daily_returns - benchmark_returns
    res_dict['INFORMATION_RATIO'] = mth.sqrt(252) * np.mean(
        excess_returns_benchmark) / np.std(excess_returns_benchmark)

    #3. t_stat & P-value
    m = np.mean(excess_returns)
    s = np.std(excess_returns)
    n = len(excess_returns)
    t_stat = (m / s) * mth.sqrt(n)
    res_dict['t_STATISTIC'] = t_stat

    pval = stats.t.sf(np.abs(t_stat), n**2 -
                      1) * 2  # Must be two-sided as we're looking at <> 0

    res_dict['p-VALUE'] = round(pval * 100, 2)
    if pval <= 0.0001:
        res_dict['SIGNIFICANCE_AT_0.01%'] = 'STATISTICALLY_SIGNIFICANT'
    else:
        res_dict['SIGNIFICANCE_AT_0.01%'] = 'NOT_STATISTICALLY_SIGNIFICANT'

    #4. Omega Ratio
    returns_less_thresh = excess_returns - (((100)**(1 / 252)) - 1)
    numer = sum(returns_less_thresh[returns_less_thresh > 0.0])
    denom = -1.0 * sum(returns_less_thresh[returns_less_thresh < 0.0])
    res_dict['OMEGA_RATIO'] = numer / denom

    #5. Tail Ratio
    res_dict['TAIL_RATIO'] = np.abs(
        np.percentile(bets.strategy_daily_returns, 95)) / np.abs(
            np.percentile(bets.strategy_daily_returns, 5))

    #6. Rachev Ratio
    left_threshold = np.percentile(excess_returns, 5)
    right_threshold = np.percentile(excess_returns, 95)
    CVAR_left = -1 * (np.nanmean(
        excess_returns[excess_returns <= left_threshold]))
    CVAR_right = (np.nanmean(
        excess_returns[excess_returns >= right_threshold]))
    res_dict['RACHEV_RATIO'] = CVAR_right / CVAR_left
    #############################################################################################
    # E. RISK MEASURES

    #1. SKEWNESS, KURTOSIS
    res_dict['SKEWNESS'] = stats.skew(bets.strategy_daily_returns, bias=False)
    res_dict['KURTOSIS'] = stats.kurtosis(bets.strategy_daily_returns,
                                          bias=False)

    #2. ANNUALIZED VOLATILITY
    res_dict['ANNUALIZED_VOLATILITY'] = np.std(
        bets.strategy_daily_returns) * np.sqrt(252)

    #3. MAR Ratio
    res_dict['MAR_RATIO'] = (res_dict['CAGR_STRATEGY']) / abs(
        res_dict['MDD_STRATEGY'])

    #############################################################################################
    # F. Classification scores

    sign_positions = np.sign(bets.purchased_shares).flatten()
    sign_profits = np.sign(bets.pnl).flatten()

    invalid = np.argwhere(np.isnan(sign_positions + sign_profits))

    sign_positions_final = np.delete(sign_positions, invalid)
    sign_profits_final = np.delete(sign_profits, invalid)

    from sklearn.metrics import precision_recall_fscore_support as score
    precision, recall, fscore, support = score(sign_profits_final,
                                               sign_positions_final)
    precision = np.float16(np.int16(precision * 100000)) / 100000.0
    recall = np.float16(np.int16(recall * 100000)) / 100000.0
    fscore = np.float16(np.int16(fscore * 100000)) / 100000.0
    support = np.float16(np.int16(support * 100000)) / 100000.0

    res_dict['CLASSIFICATION_DATA'] = {
        'Class': ['-1', '0', '1'],
        'Precision': list(precision),
        'Recall': list(recall),
        'F-Score': list(fscore),
        'Support': list(support)
    }
    # res_dict['CLASSIFICATION_DATA']= #pd.DataFrame(res_dict['CLASSIFICATION_DATA'])

    #############################################################################################
    # G. Factor Analysis
    import statsmodels.formula.api as sm  # module for stats models
    from statsmodels.iolib.summary2 import summary_col

    def assetPriceReg(excess_ret, fama):

        df_stock_factor = pd.DataFrame({
            'ExsRet': excess_ret,
            'MKT': fama[:, 0],
            'SMB': fama[:, 1],
            'HML': fama[:, 2],
            'RMW': fama[:, 3],
            'CMA': fama[:, 4]
        })

        CAPM = sm.ols(formula='ExsRet ~ MKT',
                      data=df_stock_factor).fit(cov_type='HAC',
                                                cov_kwds={'maxlags': 1})
        FF3 = sm.ols(formula='ExsRet ~ MKT + SMB + HML',
                     data=df_stock_factor).fit(cov_type='HAC',
                                               cov_kwds={'maxlags': 1})
        FF5 = sm.ols(formula='ExsRet ~ MKT + SMB + HML + RMW + CMA',
                     data=df_stock_factor).fit(cov_type='HAC',
                                               cov_kwds={'maxlags': 1})

        CAPMtstat = CAPM.tvalues
        FF3tstat = FF3.tvalues
        FF5tstat = FF5.tvalues

        CAPMcoeff = CAPM.params
        FF3coeff = FF3.params
        FF5coeff = FF5.params

        # DataFrame with coefficients and t-stats
        results_df = pd.DataFrame(
            {
                'CAPMcoeff': CAPMcoeff,
                'CAPMtstat': CAPMtstat,
                'FF3coeff': FF3coeff,
                'FF3tstat': FF3tstat,
                'FF5coeff': FF5coeff,
                'FF5tstat': FF5tstat
            },
            index=['Intercept', 'MKT', 'SMB', 'HML', 'RMW', 'CMA'])

        dfoutput = summary_col(
            [CAPM, FF3, FF5],
            stars=True,
            float_format='%0.4f',
            model_names=[
                'CAPM', 'Fama-French 3 Factors', 'Fama-French 5 factors'
            ],
            info_dict={
                'N': lambda x: "{0:d}".format(int(x.nobs)),
                'Adjusted R2': lambda x: "{:.4f}".format(x.rsquared_adj)
            },
            regressor_order=['Intercept', 'MKT', 'SMB', 'HML', 'RMW', 'CMA'])

        print(dfoutput)

        return dfoutput, results_df

    res_dict['FACTOR_RES'], _ = assetPriceReg(excess_returns, fama_factors)

    #############################################################################################
    # H. Bootstrap Stats
    # 1. Sharpe Bootstrap
    from arch.bootstrap import MovingBlockBootstrap
    from numpy.random import RandomState
    bs_sharpe = MovingBlockBootstrap(5,
                                     excess_returns,
                                     random_state=RandomState(1234))

    def sharpe(y):
        return (mth.sqrt(252) * np.mean(y)) / np.std(y)

    res = bs_sharpe.apply(sharpe, 10000)
    # plots.density_plot_bootstrap(res,res_dict['SHARPE_RATIO'])

    ############################################################################################

    return res_dict, [plotData1, plotData2]
def get_MBB_reminders(num_samples, remainder):
    reminders = np.zeros((num_samples, remainder.size))
    bs = MBB(3, remainder)
    for i, data in enumerate(bs.bootstrap(num_samples)):
        reminders[i] = data[0][0]
    return reminders
def metrics(datasets_dict, clean_values_from_weights, cleaned_index_weights,
            daily_pnl, pnl, strategy_log_returns, dollars_at_open,
            purchased_shares, underlying_daily_returns,
            cleaned_strategy_weights, starting_value, long_contribution,
            short_contribution, strategy_daily_returns):

    import numpy as np
    import hypothesisTest.utilities as hf
    import pandas as pd
    import math as mth
    from scipy import stats
    from scipy.stats import norm
    from scipy.stats import kurtosis, skew

    datasets = datasets_dict

    benchmark_returns = datasets['benchmark'][clean_values_from_weights]
    rf_returns = datasets['rf_rate'][clean_values_from_weights]
    fama_factors = datasets['Fama_French'][clean_values_from_weights]

    cleaned_index = cleaned_index_weights
    excess_returns = strategy_log_returns - rf_returns

    res_dict = dict()

    ##############################################################################################
    #A. General Characterstics
    #1. Time
    res_dict['START_DATE'] = hf.to_datetime(
        cleaned_index.min()).strftime("%d %B, %Y")

    res_dict['END_DATE'] = hf.to_datetime(
        cleaned_index.max()).strftime("%d %B, %Y")

    res_dict['TIME_RANGE_DAYS'] = '{0:.0f} days'.format(
        int(((cleaned_index.max() -
              cleaned_index.min()).astype('timedelta64[D]')) /
            np.timedelta64(1, 'D')))

    res_dict['TOTAL_BARS'] = '{0:.0f} bars'.format(int(len(cleaned_index)))

    sr = mth.sqrt(252) * np.mean(excess_returns) / np.std(excess_returns)

    def minTRL(sharpe, skew, kurtosis, target_sharpe=0, prob=0.95):
        from scipy.stats import norm
        min_track = (1 + (1 - skew * sharpe + sharpe**2 *
                          (kurtosis - 1) / 4.0) *
                     (norm.ppf(prob) / (sharpe - target_sharpe))**2)
        return min_track

    g_3 = skew(excess_returns)
    g_4 = kurtosis(excess_returns)

    res_dict['MIN_TRL_SRGE1_99%'] = '{0:.0f} bars or {1:.2f} years'.format(
        minTRL(sr, g_3, g_4, 1, 0.99),
        minTRL(sr, g_3, g_4, 1, 0.99) / 252)

    res_dict['MIN_TRL_SRGE2_99%'] = '{0:.0f} bars or {1:.2f} years'.format(
        minTRL(sr, g_3, g_4, 2, 0.99),
        minTRL(sr, g_3, g_4, 2, 0.99) / 252)

    res_dict['MIN_TRL_SRGE3_99%'] = '{0:.0f} bars or {1:.2f} years'.format(
        minTRL(sr, g_3, g_4, 3, 0.99),
        minTRL(sr, g_3, g_4, 3, 0.99) / 252)

    #2. Average AUM
    avg_aum = np.nanmean(np.nansum(np.abs(dollars_at_open), axis=1))
    res_dict['AVERAGE_AUM'] = hf.millions_fmt(avg_aum)

    #3. Capacity of Strategy

    #    res_dict['STRATEGY_CAPACITY']       = hf.millions_fmt(0)

    #4. Leverage (!!! Double check -something to do with sum of long_lev and short_lev > 1)
    avg_pos_size = np.nanmean(np.nansum(dollars_at_open, axis=1))
    res_dict['AVERAGE_POSITION_SIZE'] = hf.millions_fmt(avg_pos_size)

    res_dict['NET_LEVERAGE'] = '{0:.1f}'.format(avg_pos_size / avg_aum)

    #5. Turnover
    daily_shares = np.nansum(purchased_shares, axis=1)
    daily_value_traded = np.nansum(np.abs(dollars_at_open), axis=1)
    daily_turnover = daily_shares / (2 * daily_value_traded)

    res_dict['AVERAGE_DAILY_TURNOVER'] = hf.millions_fmt(
        np.mean(daily_turnover))

    #6. Correlation to underlying
    res_dict['CORRELATION_WITH_UNDERLYING'] = '{0:.2f}'.format(
        np.corrcoef(underlying_daily_returns, strategy_log_returns)[0, 1])

    #7. Ratio of longs

    res_dict['LONG_RATIO'] = '{0:.2f} %'.format(
        (((cleaned_strategy_weights > 0).sum()) /
         (np.ones(cleaned_strategy_weights.shape, dtype=bool).sum())) * 100)

    #8. Maximum dollar position size

    res_dict['MAX_SIZE'] = '{0:.2f} %'.format(
        np.nanmax(np.abs(cleaned_strategy_weights)) * 100)

    #9. Stability of Wealth Process

    cum_log_returns = np.log1p(strategy_log_returns).cumsum()
    rhat = stats.linregress(np.arange(len(cum_log_returns)),
                            cum_log_returns)[2]

    res_dict['STABILITY_OF_WEALTH_PROCESS'] = '{0:.2f} %'.format(
        (rhat**2) * 100)

    ##############################################################################################
    # B. Performance measures
    #1. Equity curves
    def equity_curve(amount, ret):
        ret = hf.shift_array(ret, 1, 0)
        return amount * np.cumprod(1 + ret)

    curves = dict()
    curves['Strategy'] = equity_curve(starting_value, strategy_daily_returns)
    curves['Buy & Hold Underlying'] = equity_curve(starting_value,
                                                   underlying_daily_returns)
    curves['Benchmark'] = equity_curve(starting_value, benchmark_returns)
    curves['Risk free Asset'] = equity_curve(starting_value, rf_returns)
    curves['Long Contribution'] = equity_curve(starting_value,
                                               long_contribution)
    curves['Short Contribution'] = equity_curve(starting_value,
                                                short_contribution)

    curves['time_index'] = cleaned_index
    df = pd.DataFrame.from_dict(curves)
    df = df.set_index('time_index')
    res_dict['PLOT_CURVES_DATA'] = df

    #2. Pnl from long positions check long_pnl
    res_dict['PNL_FROM_STRATEGY'] = hf.millions_fmt(curves['Strategy'][-1])

    res_dict['PNL_FROM_LONG'] = hf.millions_fmt(
        curves['Long Contribution'][-1])

    #3. Annualized rate of return (Check this)
    res_dict['ANNUALIZED_MEAN_RETURN'] = '{0:.2f} %'.format(
        (((1 + np.mean(strategy_daily_returns))**(365) - 1)) * 100)

    res_dict['CUMMULATIVE_RETURN'] = '{0:.2f} %'.format(
        (np.cumprod(1 + strategy_daily_returns)[-1] - 1) * 100)

    yrs = int(len(cleaned_index)) / 252

    cagr_strategy = ((
        (curves['Strategy'][-1] / curves['Strategy'][0])**(1 / yrs)) - 1)

    res_dict['CAGR_STRATEGY'] = '{0:.2f} %'.format(
        (((curves['Strategy'][-1] / curves['Strategy'][0])**(1 / yrs)) - 1) *
        100)

    res_dict['CAGR_BENCHMARK'] = '{0:.2f} %'.format(
        (((curves['Benchmark'][-1] / curves['Benchmark'][0])**(1 / yrs)) - 1) *
        100)

    #4. Hit Ratio

    res_dict['HIT_RATIO'] = '{0:.2f} %'.format(
        (((daily_pnl > 0).sum()) /
         ((daily_pnl > 0).sum() + (daily_pnl < 0).sum() +
          (daily_pnl == 0).sum())) * 100)

    ##############################################################################################
    # C. Runs
    # 1. Runs concentration

    def runs(returns):
        wght = returns / returns.sum()
        hhi = (wght**2).sum()
        hhi = (hhi - returns.shape[0]**-1) / (1. - returns.shape[0]**-1)
        return hhi

    res_dict['HHI_PLUS'] = '{0:.5f}'.format(
        runs(strategy_log_returns[strategy_log_returns > 0]))

    res_dict['HHI_MINUS'] = '{0:.5f}'.format(
        runs(strategy_log_returns[strategy_log_returns < 0]))

    # 2. Drawdown and Time under Water

    def MDD(returns):
        def returns_to_dollars(amount, ret):
            return amount * np.cumprod(1 + ret)

        doll_series = pd.Series(returns_to_dollars(100, returns))

        Roll_Max = doll_series.cummax()
        Daily_Drawdown = doll_series / Roll_Max - 1.0
        Max_Daily_Drawdown = Daily_Drawdown.cummin()
        return Max_Daily_Drawdown

    DD_strategy = MDD(strategy_log_returns)
    DD_benchmark = MDD(benchmark_returns)

    mdd_strat = DD_strategy.min()
    res_dict['MDD_STRATEGY'] = '{0:.2f} %'.format(DD_strategy.min() * 100)
    res_dict['MDD_BENCHMARK'] = '{0:.2f} %'.format(DD_benchmark.min() * 100)

    #3. 95 percentile
    #    res_dict['95PERCENTILE_DRAWDOWN_STRATEGY']=DD_strategy.quantile(0.05)
    #    res_dict['95PERCENTILE_DRAWDOWN_BENCHMARK']=DD_benchmark.quantile(0.05)

    #############################################################################################
    # D. Efficiency

    #1. Sharpe Ratio
    excess_returns = strategy_log_returns - rf_returns

    res_dict['SHARPE_RATIO'] = '{0:.2f}'.format(
        mth.sqrt(252) * np.mean(excess_returns) / np.std(excess_returns))

    res_dict['PROBABILISTIC_SR_GE_1'] = '{0:.2f} %'.format((norm.cdf(
        ((sr - 1) * mth.sqrt((len(excess_returns) - 1) / 252)) /
        (mth.sqrt(1 - (g_3 * sr) + (0.25 * (g_4 - 1) * sr * sr))))) * 100)
    res_dict['PROBABILISTIC_SR_GE_2'] = '{0:.2f} %'.format((norm.cdf(
        ((sr - 2) * mth.sqrt((len(excess_returns) - 1) / 252)) /
        (mth.sqrt(1 - (g_3 * sr) + (0.25 * (g_4 - 1) * sr * sr))))) * 100)
    res_dict['PROBABILISTIC_SR_GE_3'] = '{0:.2f} %'.format((norm.cdf(
        ((sr - 3) * mth.sqrt((len(excess_returns) - 1) / 252)) /
        (mth.sqrt(1 - (g_3 * sr) + (0.25 * (g_4 - 1) * sr * sr))))) * 100)

    #2. sortino Ratio

    res_dict['SORTINO_RATIO'] = '{0:.2f}'.format(
        mth.sqrt(252) * np.mean(excess_returns) /
        np.std(excess_returns[excess_returns < np.mean(excess_returns)]))

    #2.Probabilistic Sharpe ratio

    #3.Information ratio
    excess_returns_benchmark = strategy_log_returns - benchmark_returns
    res_dict['INFORMATION_RATIO'] = '{0:.2f}'.format(
        mth.sqrt(252) * np.mean(excess_returns_benchmark) /
        np.std(excess_returns_benchmark))

    #3. t_stat & P-value
    m = np.mean(excess_returns)
    s = np.std(excess_returns)
    n = len(excess_returns)
    t_stat = (m / s) * mth.sqrt(n)

    res_dict['t_STATISTIC'] = '{0:.2f}'.format(t_stat)

    pval = stats.t.sf(np.abs(t_stat), n**2 -
                      1) * 2  # Must be two-sided as we're looking at <> 0

    res_dict['p-VALUE'] = '{0:.5f} %'.format(pval * 100)

    if pval <= 0.0001:
        res_dict['SIGNIFICANCE_AT_0.01%'] = 'STATISTICALLY_SIGNIFICANT'
    else:
        res_dict['SIGNIFICANCE_AT_0.01%'] = 'NOT_STATISTICALLY_SIGNIFICANT'

    #4. Omega Ratio

    returns_less_thresh = excess_returns - (((100)**(1 / 252)) - 1)
    numer = sum(returns_less_thresh[returns_less_thresh > 0.0])
    denom = -1.0 * sum(returns_less_thresh[returns_less_thresh < 0.0])
    res_dict['OMEGA_RATIO'] = '{0:.2f}'.format(numer / denom)

    #5. Tail Ratio

    res_dict['TAIL_RATIO'] = '{0:.2f}'.format(
        np.abs(np.percentile(strategy_log_returns, 95)) /
        np.abs(np.percentile(strategy_log_returns, 5)))

    #6. Rachev Ratio
    left_threshold = np.percentile(excess_returns, 5)
    right_threshold = np.percentile(excess_returns, 95)
    CVAR_left = -1 * (np.nanmean(
        excess_returns[excess_returns <= left_threshold]))
    CVAR_right = (np.nanmean(
        excess_returns[excess_returns >= right_threshold]))

    res_dict['RACHEV_RATIO'] = '{0:.2f}'.format(CVAR_right / CVAR_left)

    #############################################################################################
    # E. RISK MEASURES

    #1. SKEWNESS, KURTOSIS
    res_dict['SKEWNESS'] = '{0:.2f}'.format(
        stats.skew(strategy_log_returns, bias=False))
    res_dict['KURTOSIS'] = '{0:.2f}'.format(
        stats.kurtosis(strategy_log_returns, bias=False))

    #2. ANNUALIZED VOLATILITY
    res_dict['ANNUALIZED_VOLATILITY'] = '{0:.2f} %'.format(
        np.std(strategy_log_returns) * np.sqrt(252) * 100)

    #3. MAR Ratio
    res_dict['MAR_RATIO'] = '{0:.2f}'.format((cagr_strategy) / abs(mdd_strat))

    #4 Tracking Error
    res_dict['TRACKING_ERROR'] = '{0:.4f}'.format(
        np.std(strategy_log_returns - benchmark_returns, ddof=1))

    percentile = 0.001
    res_dict['VaR_99.9'] = '{0:.3f} %'.format(
        np.percentile(np.sort(strategy_log_returns), percentile * 100) * 100)

    #############################################################################################
    # F. Classification scores

    sign_positions = np.sign(purchased_shares).flatten()
    sign_profits = np.sign(pnl).flatten()

    invalid = np.argwhere(np.isnan(sign_positions + sign_profits))

    sign_positions_final = np.delete(sign_positions, invalid)
    sign_profits_final = np.delete(sign_profits, invalid)

    from sklearn.metrics import precision_recall_fscore_support as score
    precision, recall, fscore, support = score(sign_profits_final,
                                               sign_positions_final)

    decimals = 3
    precision = np.round(precision, decimals)
    recall = np.round(recall, decimals)
    fscore = np.round(fscore, decimals)
    support = np.round(support, decimals)
    try:
        res_dict['CLASSIFICATION_DATA'] = {
            'Class': ['-1', '0', '1'],
            'Precision': list(precision),
            'Recall': list(recall),
            'F-Score': list(fscore),
            'Support': list(support)
        }
        res_dict['CLASSIFICATION_DATA'] = pd.DataFrame(
            res_dict['CLASSIFICATION_DATA'])
        res_dict['CLASSIFICATION_DATA'] = res_dict[
            'CLASSIFICATION_DATA'].set_index('Class')
    except:
        res_dict['CLASSIFICATION_DATA'] = {
            'Class': ['-1', '0', '1'],
            'Precision': ['NaN', 'NaN', 'NaN'],
            'Recall': ['NaN', 'NaN', 'NaN'],
            'F-Score': ['NaN', 'NaN', 'NaN'],
            'Support': ['NaN', 'NaN', 'NaN']
        }
        res_dict['CLASSIFICATION_DATA'] = pd.DataFrame(
            res_dict['CLASSIFICATION_DATA'])
        res_dict['CLASSIFICATION_DATA'] = res_dict[
            'CLASSIFICATION_DATA'].set_index('Class')

    #############################################################################################
    # G. Factor Analysis
    import statsmodels.formula.api as sm  # module for stats models
    from statsmodels.iolib.summary2 import summary_col

    def assetPriceReg(excess_ret, fama, t_decimals, coeff_decimals):

        df_stock_factor = pd.DataFrame({
            'ExsRet': excess_ret,
            'MKT': fama[:, 0],
            'SMB': fama[:, 1],
            'HML': fama[:, 2],
            'RMW': fama[:, 3],
            'CMA': fama[:, 4]
        })

        CAPM = sm.ols(formula='ExsRet ~ MKT',
                      data=df_stock_factor).fit(cov_type='HAC',
                                                cov_kwds={'maxlags': 1})
        FF3 = sm.ols(formula='ExsRet ~ MKT + SMB + HML',
                     data=df_stock_factor).fit(cov_type='HAC',
                                               cov_kwds={'maxlags': 1})
        FF5 = sm.ols(formula='ExsRet ~ MKT + SMB + HML + RMW + CMA',
                     data=df_stock_factor).fit(cov_type='HAC',
                                               cov_kwds={'maxlags': 1})

        CAPMtstat = np.round(CAPM.tvalues, t_decimals)
        FF3tstat = np.round(FF3.tvalues, t_decimals)
        FF5tstat = np.round(FF5.tvalues, t_decimals)

        CAPMcoeff = np.round(CAPM.params, coeff_decimals)
        FF3coeff = np.round(FF3.params, coeff_decimals)
        FF5coeff = np.round(FF5.params, coeff_decimals)

        # DataFrame with coefficients and t-stats
        results_df = pd.DataFrame(
            {
                'CAPM_coeff': CAPMcoeff,
                'CAPM_tstat': CAPMtstat,
                'FF3_coeff': FF3coeff,
                'FF3_tstat': FF3tstat,
                'FF5_coeff': FF5coeff,
                'FF5_tstat': FF5tstat
            },
            index=['Intercept', 'MKT', 'SMB', 'HML', 'RMW', 'CMA'])

        dfoutput = summary_col(
            [CAPM, FF3, FF5],
            stars=True,
            float_format='%0.4f',
            model_names=[
                'CAPM', 'Fama-French 3 Factors', 'Fama-French 5 factors'
            ],
            info_dict={
                'N': lambda x: "{0:d}".format(int(x.nobs)),
                'Adjusted R2': lambda x: "{:.4f}".format(x.rsquared_adj)
            },
            regressor_order=['Intercept', 'MKT', 'SMB', 'HML', 'RMW', 'CMA'])

        return dfoutput, results_df

    _, res_dict['FACTOR_RES'] = assetPriceReg(excess_returns, fama_factors, 2,
                                              5)

    #############################################################################################
    # H. Bootstrap Stats
    # 1. Sharpe Bootstrap
    from arch.bootstrap import MovingBlockBootstrap
    from numpy.random import RandomState

    def geom_mean(y):
        log_ret = np.log(1 + y)
        geom = np.exp(np.sum(log_ret) / len(log_ret)) - 1
        return geom

    geo_avg = geom_mean(strategy_daily_returns)
    detrended_ret = strategy_daily_returns - geo_avg
    bs_sharpe = MovingBlockBootstrap(5,
                                     detrended_ret,
                                     random_state=RandomState(1234))

    res = bs_sharpe.apply(geom_mean, 10000)
    #    plots.density_plot_bootstrap(res,geo_avg)
    p_val = (res <= geo_avg).sum() / len(res)
    res_dict['SHARPE_BS'] = res
    res_dict['SHARPE_BS_GEOM_AVG'] = str(round(geo_avg, 5))
    res_dict['GM_BOOTSTRAP_p_val'] = '{0:.3f} %'.format(p_val * 100)

    ############################################################################################

    return res_dict