コード例 #1
0
def get_av_uniqueness_from_triple_barrier(triple_barrier_events,
                                          close_series,
                                          num_threads,
                                          verbose=True):
    """
    This function is the orchestrator to derive average sample uniqueness from a dataset labeled by the triple barrier
    method.

    :param triple_barrier_events: (pd.DataFrame) Events from labeling.get_events()
    :param close_series: (pd.Series) Close prices.
    :param num_threads: (int) The number of threads concurrently used by the function.
    :param verbose: (bool) Flag to report progress on asynch jobs
    :return: (pd.Series) Average uniqueness over event's lifespan for each index in triple_barrier_events
    """
    out = pd.DataFrame()
    num_conc_events = mp_pandas_obj(num_concurrent_events,
                                    ('molecule', triple_barrier_events.index),
                                    num_threads,
                                    close_series_index=close_series.index,
                                    label_endtime=triple_barrier_events['t1'],
                                    verbose=verbose)
    num_conc_events = num_conc_events.loc[~num_conc_events.index.duplicated(
        keep='last')]
    num_conc_events = num_conc_events.reindex(close_series.index).fillna(0)
    out['tW'] = mp_pandas_obj(_get_average_uniqueness,
                              ('molecule', triple_barrier_events.index),
                              num_threads,
                              label_endtime=triple_barrier_events['t1'],
                              num_conc_events=num_conc_events,
                              verbose=verbose)
    return out
コード例 #2
0
ファイル: attribution.py プロジェクト: zdutta/mlfinlab
def get_weights_by_return(triple_barrier_events, close_series, num_threads=5):
    """
    Advances in Financial Machine Learning, Snippet 4.10(part 2), page 69.

    Determination of Sample Weight by Absolute Return Attribution

    This function is orchestrator for generating sample weights based on return using mp_pandas_obj.

    :param triple_barrier_events: (pd.DataFrame) Events from labeling.get_events()
    :param close_series: (pd.Series) Close prices
    :param num_threads: (int) The number of threads concurrently used by the function.
    :return: (pd.Series) Sample weights based on number return and concurrency
    """

    has_null_events = bool(triple_barrier_events.isnull().values.any())
    has_null_index = bool(triple_barrier_events.index.isnull().any())
    assert has_null_events is False and has_null_index is False, 'NaN values in triple_barrier_events, delete nans'

    num_conc_events = mp_pandas_obj(num_concurrent_events,
                                    ('molecule', triple_barrier_events.index),
                                    num_threads,
                                    close_series_index=close_series.index,
                                    label_endtime=triple_barrier_events['t1'])
    num_conc_events = num_conc_events.loc[~num_conc_events.index.duplicated(
        keep='last')]
    num_conc_events = num_conc_events.reindex(close_series.index).fillna(0)
    weights = mp_pandas_obj(_apply_weight_by_return,
                            ('molecule', triple_barrier_events.index),
                            num_threads,
                            label_endtime=triple_barrier_events['t1'],
                            num_conc_events=num_conc_events,
                            close_series=close_series)
    weights *= weights.shape[0] / weights.sum()
    return weights
コード例 #3
0
def run_ou_tests(spreads_df: pd.DataFrame,
                 combinations: list,
                 test_period: str = '2Y',
                 cross_overs_per_delta: int = 12,
                 num_threads: int = 8,
                 verbose: bool = True) -> pd.DataFrame:
    """
    This function is the multi threading wrapper that supplies _outer_ou_loop_light with pairs.

    :param spreads_df: (pd.DataFrame) Spreads Universe
    :param combinations: (list) Tuple list of pairs
    :param test_period: (str) Time delta format, to be used as the time period where the mean crossovers will be calculated
    :param cross_overs_per_delta: (int) Crossovers per time delta selected
    :param num_threads: (int) Number of cores to use
    :param verbose: (bool) Flag to report progress on asynch jobs
    :return: (pd.DataFrame) Mean reversion statistics on each pair
    """

    ou_results = mp_pandas_obj(
        func=_outer_ou_loop_light,
        pd_obj=('molecule', combinations),
        spreads_df=spreads_df,
        test_period=test_period,
        cross_overs_per_delta=cross_overs_per_delta,
        num_threads=num_threads,
        verbose=verbose,
    )

    return ou_results
コード例 #4
0
def get_sadf(series: pd.Series,
             model: str,
             lags: Union[int, list],
             min_length: int,
             add_const: bool = False,
             num_threads: int = 8) -> pd.Series:
    """
    Multithread implementation of SADF, p. 258-259

    :param series: (pd.Series) for which SADF statistics are generated
    :param model: (str) either 'linear', 'quadratic', 'sm_poly_1', 'sm_poly_2', 'sm_exp', 'sm_power'
    :param lags: (int or list) either number of lags to use or array of specified lags
    :param min_length: (int) minimum number of observations needed for estimation
    :param add_const: (bool) flag to add constant
    :param num_threads: (int) number of cores to use
    :return: (pd.Series) of SADF statistics
    """
    X, y = _get_y_x(series, model, lags, add_const)
    molecule = y.index[min_length:y.shape[0]]

    sadf_series = mp_pandas_obj(
        func=_sadf_outer_loop,
        pd_obj=('molecule', molecule),
        X=X,
        y=y,
        min_length=min_length,
        num_threads=num_threads,
    )
    return sadf_series
コード例 #5
0
def run_extremal_measure_calcs(ranked_returns: pd.DataFrame,
                               quadruples: list,
                               co_variance_matrix: np.array,
                               num_threads: int = 8,
                               verbose: bool = True) -> pd.DataFrame:
    """
    This function is the multi threading wrapper that supplies _extremal_calcs_loop with quadruples.
    :param co_variance_matrix: (np.array) Covariance Matrix
    :param ranked_returns: (pd.DataFrame) ranked returns
    :param quadruples: (list)  list of quadruples
    :param num_threads: (int) Number of cores to use
    :param verbose: (bool) Flag to report progress on asynch jobs
    :return: (pd.DataFrame) Quadruple with biggest extremal measure
    """

    results = mp_pandas_obj(
        func=_extremal_measure_loop,
        pd_obj=('molecule', quadruples),
        ranked_returns=ranked_returns,
        co_variance_matrix=co_variance_matrix,
        num_threads=num_threads,
        verbose=verbose,
    )

    return results.iloc[results['result'].argmax()]
コード例 #6
0
ファイル: sadf.py プロジェクト: bobcolner/pandas-polygon
def get_sadf(series: pd.Series,
             model: str,
             lags: Union[int, list],
             min_length: int,
             add_const: bool = False,
             phi: float = 0,
             num_threads: int = 8,
             verbose: bool = True) -> pd.Series:
    """
    Advances in Financial Machine Learning, p. 258-259.

    Multithread implementation of SADF

    SADF fits the ADF regression at each end point t with backwards expanding start points. For the estimation
    of SADF(t), the right side of the window is fixed at t. SADF recursively expands the beginning of the sample
    up to t - min_length, and returns the sup of this set.

    When doing with sub- or super-martingale test, the variance of beta of a weak long-run bubble may be smaller than
    one of a strong short-run bubble, hence biasing the method towards long-run bubbles. To correct for this bias,
    ADF statistic in samples with large lengths can be penalized with the coefficient phi in [0, 1] such that:

    ADF_penalized = ADF / (sample_length ^ phi)

    :param series: (pd.Series) Series for which SADF statistics are generated
    :param model: (str) Either 'linear', 'quadratic', 'sm_poly_1', 'sm_poly_2', 'sm_exp', 'sm_power'
    :param lags: (int or list) Either number of lags to use or array of specified lags
    :param min_length: (int) Minimum number of observations needed for estimation
    :param add_const: (bool) Flag to add constant
    :param phi: (float) Coefficient to penalize large sample lengths when computing SMT, in [0, 1]
    :param num_threads: (int) Number of cores to use
    :param verbose: (bool) Flag to report progress on asynch jobs
    :return: (pd.Series) SADF statistics
    """
    X, y = _get_y_x(series, model, lags, add_const)
    molecule = y.index[min_length:y.shape[0]]

    sadf_series = mp_pandas_obj(
        func=_sadf_outer_loop,
        pd_obj=('molecule', molecule),
        X=X,
        y=y,
        min_length=min_length,
        model=model,
        phi=phi,
        num_threads=num_threads,
        verbose=verbose,
    )
    return sadf_series
コード例 #7
0
def avg_active_signals(signals, num_threads=1):
    """
    SNIPPET 10.2 - BETS ARE AVERAGED AS LONG AS THEY ARE STILL ACTIVE
    Function averages the bet sizes of all concurrently active bets. This function makes use of multiprocessing.

    :param signals: (pandas.DataFrame) Contains at least the following columns:
     'signal' - the bet size
     't1' - the closing time of the bet
     And the index must be datetime format.
    :param num_threads: (int) Number of threads to use in multiprocessing, default value is 1.
    :return: (pandas.Series) The averaged bet sizes.
    """
    # 1) Time points where signals change (either one start or one ends).
    t_pnts = set(signals['t1'].dropna().to_numpy())
    t_pnts = t_pnts.union(signals.index.to_numpy())
    t_pnts = list(t_pnts)
    t_pnts.sort()
    out = mp_pandas_obj(mp_avg_active_signals, ('molecule', t_pnts), num_threads, signals=signals)
    return out
コード例 #8
0
def run_extended_correlation_calcs(u: pd.DataFrame,
                                   quadruples: list,
                                   num_threads: int = 8,
                                   verbose: bool = True) -> pd.DataFrame:
    """
    This function is the multi threading wrapper that supplies _extended_correlation_loop with quadruples.
    :param u: (pd.DataFrame) ranked returns
    :param quadruples: (list)  list of quadruples
    :param num_threads: (int) Number of cores to use
    :param verbose: (bool) Flag to report progress on asynch jobs
    :return: (pd.DataFrame) Quadruple with highest multivariate correlation
    """

    results = mp_pandas_obj(
        func=_extended_correlation_loop,
        pd_obj=('molecule', quadruples),
        u_matrix=u,
        num_threads=num_threads,
        verbose=verbose,
    )

    return results.iloc[results['result'].argmax()]
コード例 #9
0
def run_diagonal_measure_calcs(ranked_returns: pd.DataFrame,
                               quadruples: list,
                               num_threads: int = 8,
                               verbose: bool = True) -> pd.DataFrame:
    """
    This function is the multi threading wrapper that supplies _diagonal_measure_loop with quadruples.
    :param ranked_returns: (pd.DataFrame) ranked returns
    :param quadruples: (list)  list of quadruples
    :param num_threads: (int) Number of cores to use
    :param verbose: (bool) Flag to report progress on asynch jobs
    :return: (pd.DataFrame) Quadruple with smallest diagonal measure
    """

    results = mp_pandas_obj(
        func=_diagonal_measure_loop,
        pd_obj=('molecule', quadruples),
        ranked_returns=ranked_returns,
        num_threads=num_threads,
        verbose=verbose,
    )

    return results.iloc[results['result'].argmin()]
コード例 #10
0
def run_cointegration_tests(prices_df: pd.DataFrame,
                            combinations: list,
                            num_threads: int = 8,
                            verbose: bool = True) -> pd.DataFrame:
    """
    This function is the multi threading wrapper that supplies _outer_cointegration_loop

    :param prices_df: (pd.DataFrame) Price Universe
    :param combinations: (list) Tuple list of pairs
    :param num_threads: (int) Number of cores to use
    :param verbose: (bool) Flag to report progress on asynch jobs
    :return: (pd.DataFrame) Cointegration statistics on each pair
    """

    cointegration_results = mp_pandas_obj(
        func=_outer_cointegration_loop,
        pd_obj=('molecule', combinations),
        prices_df=prices_df,
        num_threads=num_threads,
        verbose=verbose,
    )

    return cointegration_results.sort_values(['pvalue'], ascending=True)
コード例 #11
0
def run_traditional_correlation_calcs(corr_matrix: pd.DataFrame,
                                      quadruples: list,
                                      num_threads: int = 8,
                                      verbose: bool = True) -> pd.DataFrame:
    """
    This function is the multi threading wrapper that supplies _traditional_correlation_loop with quadruples.
    :param corr_matrix: (pd.DataFrame) Correlation Matrix
    :param quadruples: (list)  list of quadruples
    :param num_threads: (int) Number of cores to use
    :param verbose: (bool) Flag to report progress on asynch jobs
    :return: (pd.DataFrame) Quadruple with highest sum of correlations
    """

    results = mp_pandas_obj(
        func=_traditional_correlation_loop,
        pd_obj=('molecule', quadruples),
        corr_matrix=corr_matrix,
        num_threads=num_threads,
        mp_batches=10,
        verbose=verbose,
    )

    return results.iloc[results['result'].argmax()]
コード例 #12
0
def get_events(close, t_events, pt_sl, target, min_ret, num_threads, vertical_barrier_times=False,
               side_prediction=None, verbose=True):
    """
    Advances in Financial Machine Learning, Snippet 3.6 page 50.

    Getting the Time of the First Touch, with Meta Labels

    This function is orchestrator to meta-label the data, in conjunction with the Triple Barrier Method.

    :param close: (pd.Series) Close prices
    :param t_events: (pd.Series) of t_events. These are timestamps that will seed every triple barrier.
        These are the timestamps selected by the sampling procedures discussed in Chapter 2, Section 2.5.
        Eg: CUSUM Filter
    :param pt_sl: (2 element array) Element 0, indicates the profit taking level; Element 1 is stop loss level.
        A non-negative float that sets the width of the two barriers. A 0 value means that the respective
        horizontal barrier (profit taking and/or stop loss) will be disabled.
    :param target: (pd.Series) of values that are used (in conjunction with pt_sl) to determine the width
        of the barrier. In this program this is daily volatility series.
    :param min_ret: (float) The minimum target return required for running a triple barrier search.
    :param num_threads: (int) The number of threads concurrently used by the function.
    :param vertical_barrier_times: (pd.Series) A pandas series with the timestamps of the vertical barriers.
        We pass a False when we want to disable vertical barriers.
    :param side_prediction: (pd.Series) Side of the bet (long/short) as decided by the primary model
    :param verbose: (bool) Flag to report progress on asynch jobs
    :return: (pd.DataFrame) Events
            -events.index is event's starttime
            -events['t1'] is event's endtime
            -events['trgt'] is event's target
            -events['side'] (optional) implies the algo's position side
            -events['pt'] is profit taking multiple
            -events['sl']  is stop loss multiple
    """

    # 1) Get target
    target = target.reindex(t_events)
    target = target[target > min_ret]  # min_ret

    # 2) Get vertical barrier (max holding period)
    if vertical_barrier_times is False:
        vertical_barrier_times = pd.Series(pd.NaT, index=t_events, dtype=t_events.dtype)

    # 3) Form events object, apply stop loss on vertical barrier
    if side_prediction is None:
        side_ = pd.Series(1.0, index=target.index)
        pt_sl_ = [pt_sl[0], pt_sl[0]]
    else:
        side_ = side_prediction.reindex(target.index)  # Subset side_prediction on target index.
        pt_sl_ = pt_sl[:2]

    # Create a new df with [v_barrier, target, side] and drop rows that are NA in target
    events = pd.concat({'t1': vertical_barrier_times, 'trgt': target, 'side': side_}, axis=1)
    events = events.dropna(subset=['trgt'])

    # Apply Triple Barrier
    first_touch_dates = mp_pandas_obj(func=apply_pt_sl_on_t1,
                                      pd_obj=('molecule', events.index),
                                      num_threads=num_threads,
                                      close=close,
                                      events=events,
                                      pt_sl=pt_sl_,
                                      verbose=verbose)

    for ind in events.index:
        events.at[ind, 't1'] = first_touch_dates.loc[ind, :].dropna().min()

    if side_prediction is None:
        events = events.drop('side', axis=1)

    # Add profit taking and stop loss multiples for vertical barrier calculations
    events['pt'] = pt_sl[0]
    events['sl'] = pt_sl[1]

    return events
コード例 #13
0
ファイル: labeling.py プロジェクト: didw/ml_finance
def get_events(close,
               t_events,
               pt_sl,
               target,
               min_ret,
               num_threads,
               vertical_barrier_times=False,
               side_prediction=None):
    """
    Snippet 3.6 page 50, Getting the Time of the First Touch, with Meta Labels

    This function is orchestrator to meta-label the data, in conjunction with the Triple Barrier Method.

    :param close: (series) Close prices
    :param t_events: (series) of t_events. These are timestamps that will seed every triple barrier.
        These are the timestamps selected by the sampling procedures discussed in Chapter 2, Section 2.5.
        Eg: CUSUM Filter
    :param pt_sl: (2 element array) element 0, indicates the profit taking level; element 1 is stop loss level.
        A non-negative float that sets the width of the two barriers. A 0 value means that the respective
        horizontal barrier (profit taking and/or stop loss) will be disabled.
    :param target: (series) of values that are used (in conjunction with pt_sl) to determine the width
        of the barrier. In this program this is daily volatility series.
    :param min_ret: (float) The minimum target return required for running a triple barrier search.
    :param num_threads: (int) The number of threads concurrently used by the function.
    :param vertical_barrier_times: (series) A pandas series with the timestamps of the vertical barriers.
        We pass a False when we want to disable vertical barriers.
    :param side_prediction: (series) Side of the bet (long/short) as decided by the primary model
    :return: (data frame) of events
            -events.index is event's starttime
            -events['t1'] is event's endtime
            -events['trgt'] is event's target
            -events['side'] (optional) implies the algo's position side
    """

    # 1) Get target
    target = target.loc[t_events]
    target = target[target > min_ret]  # min_ret

    # 2) Get vertical barrier (max holding period)
    if vertical_barrier_times is False:
        vertical_barrier_times = pd.Series(pd.NaT, index=t_events)

    # 3) Form events object, apply stop loss on vertical barrier
    if side_prediction is None:
        side_ = pd.Series(1., index=target.index)
        pt_sl_ = [pt_sl[0], pt_sl[0]]
    else:
        side_ = side_prediction.loc[target.index]
        pt_sl_ = pt_sl[:2]

    # Create a new df with [v_barrier, target, side] and drop rows that are NA in target
    events = pd.concat(
        {
            't1': vertical_barrier_times,
            'trgt': target,
            'side': side_
        }, axis=1)
    events = events.dropna(subset=['trgt'])

    # Apply Triple Barrier
    df0 = mp_pandas_obj(func=apply_pt_sl_on_t1,
                        pd_obj=('molecule', events.index),
                        num_threads=num_threads,
                        close=close,
                        events=events,
                        pt_sl=pt_sl_)

    events['t1'] = df0.dropna(how='all').min(axis=1)  # pd.min ignores nan

    if side_prediction is None:
        events = events.drop('side', axis=1)

    return events