Exemple #1
0
def test_full_detailed_tca_calculation():
    """Tests a detailed TCA calculation, checking that it has the right tables returned.
    """

    tca_request = TCARequest(start_date=start_date,
                             finish_date=finish_date,
                             ticker=ticker,
                             trade_data_store=trade_data_store,
                             market_data_store=market_data_store,
                             trade_order_mapping=trade_order_mapping)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    dict_of_df = tca_engine.calculate_tca(tca_request=tca_request)

    assert (trade_df_name in dict_of_df
            and 'sparse_market_' + trade_df_name in dict_of_df
            and 'market_df' in dict_of_df)

    tca_request.ticker = missing_ticker

    data_missing_exception = False

    try:
        dict_of_df = tca_engine.calculate_tca(tca_request=tca_request)
    except DataMissingException:
        data_missing_exception = True

    assert data_missing_exception
Exemple #2
0
def get_sample_data(ticker_spec=None):
    if ticker_spec is None: ticker_spec = ticker

    logger.info("About to load data for " + ticker_spec)

    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=ticker_spec,
        trade_data_store=trade_data_store,
        trade_data_database_name=trade_data_database_name,
        reporting_currency=reporting_currency,
        market_data_store=market_data_store,
        market_data_database_table=market_data_database_table,
        trade_order_mapping=trade_order_mapping,
        tca_type=tca_type,
        benchmark_calcs=BenchmarkMarketMid(),
        use_multithreading=use_multithreading)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    trade_order_results_df_dict = tca_engine.calculate_tca(tca_request)

    return trade_order_results_df_dict[ticker_spec + "_df"], trade_order_results_df_dict[trade_order_list[0]], \
           trade_order_results_df_dict[trade_order_list[1]]
def test_overlapping_full_detailed_tca_calculation():
    """Tests a detailed TCA calculation works with caching and overlapping dates, checking that it has the right tables returned.
    """

    logger = LoggerManager.getLogger(__name__)

    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker,
                             tca_type='detailed',
                             trade_data_store=trade_data_store,
                             market_data_store=market_data_store,
                             trade_order_mapping=trade_order_mapping, use_multithreading=True)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    dict_of_df = tca_engine.calculate_tca(tca_request=tca_request)

    sparse_market_trade_df = dict_of_df['sparse_market_' + trade_df_name]

    logger.info("Running second TCA calculation, extending dates...")

    # Extend sample
    tca_request.start_date = pd.Timestamp(start_date) - timedelta(days=10)

    dict_of_df = tca_engine.calculate_tca(tca_request=tca_request)

    sparse_market_trade_df = dict_of_df['sparse_market_' + trade_df_name]

    assert len(sparse_market_trade_df.index[sparse_market_trade_df.index < '01 Feb 2018']) > 0
Exemple #4
0
def venue_tca_aggregated_example():
    """Example of doing an aggregated TCA computation on a single ticker, and then later calculating the probability
    distribution function of slippage split by venue (when weighted by executed notional)
    """
    tca_engine = TCAEngineImpl(version=tca_version)

    tca_request = TCARequest(start_date=start_date,
                             finish_date=finish_date,
                             ticker=ticker,
                             tca_type='aggregated',
                             trade_data_store=trade_data_store,
                             market_data_store=market_data_store,
                             metric_calcs=MetricSlippage())

    dict_of_df = tca_engine.calculate_tca(tca_request)

    summary = ResultsSummary()

    summary_slippage_df = summary.field_distribution(
        dict_of_df['trade_df'],
        metric_name='slippage',
        aggregate_by_field='venue',
        pdf_only=True,
        weighting_field='executed_notional')

    # Plot PDF of slippage, split up by venue
    Chart(engine='plotly').plot(summary_slippage_df,
                                style=Style(plotly_plot_mode='offline_html',
                                            connect_line_gaps=True))
def example_request_mid_benchmark():
    """Example of how to do a calculation to do market analysis to calculate mid, resample etc. without any trade data

    """
    from tcapy.analysis.algos.benchmark import BenchmarkMarketMid, BenchmarkMarketSpreadToMid, BenchmarkMarketResampleOffset

    tca_request = get_tca_request()

    # Allow analysis to be done in a parallel way day by day
    # (note: can't do analysis which requires data outside of the day to do this!)
    tca_request.multithreading_params['splice_request_by_dates'] = True

    # We'll calculate the market mid, then calculate the spread to the mid, then we shall resample the data into 1 minute
    # data, taking the mean of each minute (and TWAP)
    tca_request.benchmark_calcs = [
        BenchmarkMarketMid(),
        BenchmarkMarketSpreadToMid(),
        BenchmarkMarketResampleOffset(market_resample_freq='1',
                                      market_resample_unit='min',
                                      price_field='mid',
                                      resample_how=['mean', 'twap'])
    ]
    tca_request.use_multithreading = True

    tca_engine = TCAEngineImpl()

    dict_of_df = tca_engine.calculate_tca(tca_request)

    print(dict_of_df)
Exemple #6
0
def test_tag_filter_calculation():
    """Test we can filter by venue and by broker correctly.
    """

    trade_order_filter = TradeOrderFilterTag(tag_value_combinations={'broker_id': 'broker1'})

    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker,
                             trade_data_store=trade_data_store,
                             reporting_currency=reporting_currency,
                             market_data_store=market_data_store,
                             trade_order_mapping=trade_order_mapping,
                             tca_type=tca_type,
                             trade_order_filter=trade_order_filter,
                             venue='venue1')

    tca_engine = TCAEngineImpl(version=tcapy_version)

    trade_order_results_df_dict = tca_engine.calculate_tca(tca_request)

    trade_df = trade_order_results_df_dict[trade_df_name]

    if trade_df is not None:
        if not(trade_df.empty):

            # note that this only works with the "test" data - it won't work with real data!
            match_brokers = len(trade_df[trade_df['broker_id'] == 'broker1'])
            non_brokers = len(trade_df[trade_df['broker_id'] != 'broker1'])

            match_venue = len(trade_df[trade_df['venue'] == 'venue1'])
            non_match_venue = len(trade_df[trade_df['venue'] != 'venue1'])

            # check the filtering has been correctly, so we only have trades by broker1 and venue1
            assert match_brokers > 0 and non_brokers == 0 and match_venue > 0 and non_match_venue == 0
Exemple #7
0
def test_data_offset():
    """Tests the offsetting of market and trade data by milliseconds by user. This might be useful if clocks are slightly
    offset when recording market or trade data
    """
    Mediator.get_volatile_cache().clear_cache()

    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker,
                             trade_data_store=trade_data_store,
                             market_data_store=market_data_store,
                             trade_order_mapping=trade_order_mapping)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    dict_of_df = tca_engine.calculate_tca(tca_request=tca_request)

    # Now offset both the trade and market data
    tca_request.trade_data_offset_ms = 1
    tca_request.market_data_offset_ms = -1

    dict_of_df_offset = tca_engine.calculate_tca(tca_request=tca_request)

    trade_df = dict_of_df[trade_df_name]; market_df = dict_of_df['market_df']
    trade_df_offset = dict_of_df_offset[trade_df_name]; market_df_offset = dict_of_df_offset['market_df']

    assert all(market_df.index + timedelta(milliseconds=-1) == market_df_offset.index)
    assert all(trade_df.index + timedelta(milliseconds=1) == trade_df_offset.index)

    for c in constants.date_columns:
        if c in trade_df.columns:
            assert all(trade_df[c]+ timedelta(milliseconds=1) == trade_df_offset[c])
Exemple #8
0
def multiple_ticker_tca_aggregated_with_results_example():
    """Example of how to do TCa analysis on multiple tickers with TCAResults
    """

    tca_engine = TCAEngineImpl(version=tca_version)

    # Run a TCA computation for multiple tickers, calculating slippage
    tca_request = TCARequest(start_date=start_date,
                             finish_date=finish_date,
                             ticker=mult_ticker,
                             tca_type='aggregated',
                             trade_data_store=trade_data_store,
                             market_data_store=market_data_store,
                             results_form=[
                                 TimelineResultsForm(metric_name='slippage',
                                                     by_date='datehour',
                                                     scalar=10000.0)
                             ],
                             metric_calcs=MetricSlippage(),
                             reporting_currency='EUR',
                             summary_display='candlestick')

    dict_of_df = tca_engine.calculate_tca(tca_request)

    # Show the output of objects
    print(dict_of_df.keys())

    ### Generate TCA report using high level object
    # Use higher level TCAResults object to encapsulate results (easier to deal with than a dictionary of DataFrames)
    tca_results = TCAResults(dict_of_df, tca_request)
    tca_results.render_computation_charts()

    print(tca_results.sparse_market_charts.keys())
    print(tca_results.sparse_market.keys())
Exemple #9
0
def test_invalid_tca_inputs():
    """Check exception is thrown with TCAEngine if ticker is not valid (eg. if none, or just a random string of 6 letters,
    or if the includes '/'
    """

    tca_engine = TCAEngineImpl(version=tcapy_version)

    invalid_tickers = [None, 'KRPAZY', 'EUR/USD']

    for t in invalid_tickers:
        tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=t)

        ticker_exception_ok = []

        try:
            trade_order_results_df_dict = tca_engine.calculate_tca(tca_request)

            ticker_exception_ok.append(False)
        except Exception as e:
            if isinstance(e, ValidationException):
                ticker_exception_ok.append(True)

    ### Check exception is thrown with TCAEngine if start/finish dates are messed up
    date_exception_ok = []

    try:
        tca_request = TCARequest(start_date='01 Mar19', finish_date='01Oc t20', ticker='EURUSD')

        date_exception_ok.append(False)
    except Exception as e:
        if isinstance(e, DateException):
            date_exception_ok.append(True)

    assert any(ticker_exception_ok) and any(date_exception_ok)
Exemple #10
0
def get_sample_data():
    logger.info("About to load data for " + ticker)

    if use_test_csv:
        tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker,
                                 trade_data_store='csv',
                                 reporting_currency='EUR',
                                 market_data_store=os.path.join(folder, 'small_test_market_df.csv.gz'),
                                 trade_order_mapping={'trade_df': os.path.join(folder, 'small_test_trade_df.csv'),
                                                      'order_df': os.path.join(folder, 'small_test_order_df.csv')},
                                 tca_type=tca_type, benchmark_calcs=BenchmarkMarketMid())
    else:
        tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker,
                                 trade_data_store=trade_data_source,
                                 reporting_currency='EUR',
                                 market_data_store=market_data_store,
                                 trade_order_mapping=['trade_df', 'order_df'],
                                 tca_type=tca_type, benchmark_calcs=BenchmarkMarketMid())

    tca_engine = TCAEngineImpl()

    trade_order_results_df_dict = tca_engine.calculate_tca(tca_request)

    return trade_order_results_df_dict[ticker + '_df'], trade_order_results_df_dict['trade_df'], \
           trade_order_results_df_dict['order_df']
Exemple #11
0
def test_full_detailed_tca_calculation(fill_market_trade_databases):
    """Tests a detailed TCA calculation, checking that it has the right tables returned.
    """

    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=ticker,
        trade_data_store=trade_data_store,
        trade_data_database_name=trade_data_database_name,
        market_data_store=market_data_store,
        market_data_database_table=market_data_database_table,
        trade_order_mapping=trade_order_mapping,
        use_multithreading=use_multithreading)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    dict_of_df = tca_engine.calculate_tca(tca_request=tca_request)

    assert ('trade_df' in dict_of_df and 'sparse_market_trade_df' in dict_of_df
            and 'market_df' in dict_of_df)

    # Missing ticker won't return any data, internally a DataMissingException is thrown
    tca_request.ticker = missing_ticker

    data_missing_exception = False

    try:
        dict_of_df = tca_engine.calculate_tca(tca_request=tca_request)
    except DataMissingException:
        data_missing_exception = True

    assert data_missing_exception
Exemple #12
0
def test_create_tca_report(fill_market_trade_databases):
    """Tests the creation of a TCAResults, checking they are fichecking it generates the right document
    """
    Mediator.get_volatile_cache().clear_cache()

    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=ticker,
        trade_data_store=trade_data_store,
        trade_data_database_name=trade_data_database_name,
        market_data_store=market_data_store,
        market_data_database_table=market_data_database_table,
        trade_order_mapping=trade_order_mapping,
        metric_calcs=MetricSlippage(),
        results_form=TimelineResultsForm(metric_name='slippage',
                                         by_date='datehour'),
        use_multithreading=use_multithreading)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    tca_results = TCAResults(tca_engine.calculate_tca(tca_request=tca_request),
                             tca_request)
    tca_results.render_computation_charts()

    assert tca_results.timeline is not None and tca_results.timeline_charts is not None

    tca_report = TCAReport(tca_results)
    html = tca_report.create_report()

    # Quick check to see that the html has been generated by checking existance of HTML head _tag
    assert '<head>' in html
Exemple #13
0
def multiple_ticker_tca_aggregated_example():
    """Example of how to do TCa analysis on multiple tickers
    """

    tca_engine = TCAEngineImpl(version=tca_version)

    # Run a TCA computation for multiple tickers, calculating slippage
    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=mult_ticker, tca_type='aggregated',
                             trade_data_store=trade_data_store, market_data_store=market_data_store,
                             metric_calcs=MetricSlippage(), reporting_currency='EUR')

    dict_of_df = tca_engine.calculate_tca(tca_request)

    trade_df = dict_of_df['trade_df']

    # Aggregate some of the results with the ResultsSummary class (we could have done this within the TCARequest)
    summary = ResultsSummary()

    # Bucket slippage by ticker and report the average
    summary_slippage_df = summary.field_bucketing(trade_df, aggregate_by_field='ticker')

    print(summary_slippage_df)

    # Bucket slippage by ticker & return the average as weighted by the executed notional in reporting currency
    # (in this case EUR)
    summary_slippage_df = summary.field_bucketing(trade_df, aggregate_by_field='venue',
                                                  weighting_field='executed_notional_in_reporting_currency')

    print(summary_slippage_df)

    # Bucket slippage by ticker and report the average
    summary_slippage_df = summary.field_bucketing(trade_df, aggregate_by_field='venue')

    print(summary_slippage_df)
Exemple #14
0
def simplest_tca_single_ticker_example():
    """Example for doing detailed TCA analysis on the trades of a single ticker, calculating metrics for slippage,
    transient market impact & permanent market impact. It also calculates benchmarks for arrival price of each trade and
    spread to mid).

    Collects results for slippage into a daily timeline and also average by venue (by default weights by reporting
    currency)
    """

    tca_engine = TCAEngineImpl(version=tca_version)

    # Specify the TCA request
    tca_request = TCARequest(start_date='01 Nov 2017', finish_date='20 Nov 2017', ticker='AUDUSD',
                             tca_type='detailed',
                             trade_data_store='ms_sql_server', market_data_store='arctic-ncfx',
                             metric_calcs=[MetricSlippage(trade_order_list=['trade_df', 'order_df'])],
                             results_form=[TimelineResultsForm(metric_name='slippage', by_date='date', scalar=10000.0)],
                             benchmark_calcs=[BenchmarkArrival(), BenchmarkMarketSpreadToMid()],
                             trade_order_mapping=['trade_df', 'order_df'])

    # Dictionary of dataframes as output from TCA calculation
    dict_of_df = tca_engine.calculate_tca(tca_request)

    print(dict_of_df.keys())

    metric_df = dict_of_df['trade_df']['slippage']  # permanent market impact for every trade

    print(metric_df.head(500))
Exemple #15
0
def test_time_of_day_filter_calculation():
    """Test we can filter by time of day/date
    """

    trade_order_filter = TradeOrderFilterTimeOfDayWeekMonth(
        specific_dates=filter_date)

    tca_request = TCARequest(start_date=start_date,
                             finish_date=finish_date,
                             ticker=ticker,
                             trade_data_store=trade_data_store,
                             reporting_currency=reporting_currency,
                             market_data_store=market_data_store,
                             trade_order_mapping=trade_order_mapping,
                             tca_type=tca_type,
                             trade_order_filter=trade_order_filter)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    trade_order_results_df_dict = tca_engine.calculate_tca(tca_request)

    trade_df = trade_order_results_df_dict[trade_df_name]

    if trade_df is not None:
        if not (trade_df.empty):

            match_filtered_date = len(
                trade_df[start_filter_date:finish_filter_date])
            non_filtered_date = len(
                trade_df[(trade_df.index > finish_filter_date)
                         & (trade_df.index < start_filter_date)])

            # check the filtering has been correctly, so we only have trades by broker1 and venue1
            assert match_filtered_date > 0 and non_filtered_date == 0
Exemple #16
0
def example_request_mid_benchmark():
    """Example of how to do a calculation to do market analysis to calculate mid, resample etc. without any trade data

    """
    from tcapy.analysis.algos.benchmark import BenchmarkMarketMid, BenchmarkMarketSpreadToMid, BenchmarkMarketResampleOffset, \
        BenchmarkMarketFilter
    from tcapy.analysis.algos.resultsform import BarResultsForm, TimelineResultsForm

    tca_request = get_tca_request()

    # Allow analysis to be done in a parallel approach day by day
    # (note: can't do analysis which requires data outside of the daily chunks to do this!)
    tca_request.multithreading_params['splice_request_by_dates'] = use_multithreading

    # Filter market data by time of day between 15:00-17:00 LDN
    # Then calculate the market mid, then calculate the spread to the mid,
    # Then resample the data into 1 minute, taking the mean of each minute (and TWAP) and calculating the absolute range
    tca_request.benchmark_calcs = [BenchmarkMarketFilter(time_of_day={'start_time' : "15:00", 'finish_time' : "17:00"},
                                                         time_zone='Europe/London'),
                                   BenchmarkMarketMid(), BenchmarkMarketSpreadToMid(),
                                   BenchmarkMarketResampleOffset(market_resample_freq='1', market_resample_unit='min',
                                        price_field='mid', resample_how=['mean', 'twap', 'absrange'], dropna=True),
                                   ]

    # Calculate the mean spread to mid for EURUSD by time of day during our sample (do not weight by any other field)
    # Calculate the mean absrange for EURUSD by time of day (London timezone)/month of _year (ie. proxy for volatility)
    tca_request.results_form = \
        [TimelineResultsForm(market_trade_order_list='EURUSD', metric_name='ask_mid_spread',
                             weighting_field=None, by_date='time', scalar=10000.0),
         TimelineResultsForm(market_trade_order_list='EURUSD', metric_name='absrange',
                             weighting_field=None, by_date=['month', 'timeldn'], scalar=10000.0)
        ]

    # return
    tca_request.use_multithreading = True

    tca_engine = TCAEngineImpl()

    dict_of_df = tca_engine.calculate_tca(tca_request)

    # Print out all keys for all the DataFrames returned
    print(dict_of_df.keys())

    # Print market data snapshots
    print(dict_of_df['EURUSD_df'])
    print(dict_of_df['USDJPY_df'])
    print(dict_of_df['EURUSD_df'].columns)
    print(dict_of_df['USDJPY_df'].columns)

    # Print out mean spread by time of day
    print(dict_of_df['timeline_EURUSD_ask_mid_spread_by/mean_time/all'])

    # Plot mean spread by time of day and absrange by time of day (in London timezone)
    Chart(engine='plotly').plot(dict_of_df['timeline_EURUSD_ask_mid_spread_by/mean_time/all'])

    # Plot absolute range over each minute, averaged by time of day and month of the _year
    Chart(engine='plotly').plot(dict_of_df['timeline_EURUSD_absrange_by/mean_month_timeldn/all'],
                                style=Style(title='EURUSD absolute range by time of day (LDN)', color='Reds', scale_factor=-1))
Exemple #17
0
def single_ticker_tca_example_1600LDN_benchmark():
    tca_engine = TCAEngineImpl(version=tca_version)

    trade_order_type = 'trade_df'
    trade_order_list = ['trade_df', 'order_df']

    # specify the TCA request
    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=ticker,
        tca_type='detailed',
        dummy_market=False,
        trade_data_store=trade_data_store,
        market_data_store=market_data_store,
        metric_calcs=[  # Calculate the slippage for trades/order
            MetricSlippage(trade_order_list=trade_order_list,
                           bid_benchmark='twap1600LDN',
                           ask_benchmark='twap1600LDN',
                           metric_post_fix='twap1600LDN')
        ],
        results_form=[  # Aggregate the slippage average by date and hour
            TimelineResultsForm(metric_name='slippagetwap1600LDN',
                                by_date='date',
                                scalar=10000.0)
        ],
        benchmark_calcs=[  # At the arrival price for every trade/order
            BenchmarkArrival(),

            # Calculate TWAP over 16:00 LDN
            BenchmarkTWAP(start_time_before_offset={'m': 2},
                          finish_time_after_offset={'s': 30},
                          overwrite_time_of_day='16:00',
                          overwrite_timezone='Europe/London',
                          benchmark_post_fix="1600LDN"),

            # At the spread at the time of every trade/order
            BenchmarkMarketSpreadToMid()
        ],
        extra_lines_to_plot='twap1600LDN',
        trade_order_mapping=trade_order_list,
        use_multithreading=True)

    # Dictionary of dataframes as output from TCA calculation
    dict_of_df = tca_engine.calculate_tca(tca_request)

    print(dict_of_df['trade_df'].head(5))

    tca_results = TCAResults(dict_of_df, tca_request)
    tca_results.render_computation_charts()

    from tcapy.vis.report.computationreport import JinjaRenderer

    tca_report = TCAReport(tca_results, renderer=JinjaRenderer())

    tca_report.create_report(output_filename='test_tca_twap_report.htm',
                             output_format='html',
                             offline_js=False)
def dataframe_tca_example():
    """Example for doing detailed TCA analysis on all the trades in a CSV, calculating metrics for slippage,
    transient market impact & permanent market impact. It also calculates benchmarks for arrival price of each trade and
    spread to mid).

    Collects results for slippage into a daily timeline and also average by venue (by default weights by reporting
    currency)
    """
    PLOT = False

    # clear entire cache
    # Mediator.get_volatile_cache(version='pro').clear_cache()

    tca_engine = TCAEngineImpl(version=tca_version)

    trade_order_type = 'trade_df'
    trade_order_list = ['trade_df']

    trade_df = DatabaseSourceCSV(trade_data_database_csv=csv_trade_order_mapping['trade_df']).fetch_trade_order_data()

    data_frame_trade_order_mapping = OrderedDict([('trade_df', trade_df)])

    start_date = trade_df.index[0]; finish_date = trade_df.index[-1]

    ticker_list = FXConv().correct_unique_notation_list(trade_df['ticker'].unique().tolist())

    # Specify the TCA request
    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker_list,
                             tca_type='aggregated', dummy_market=True,
                             trade_data_store='dataframe', market_data_store=market_data_store,
                             metric_calcs=[MetricSlippage(trade_order_list=trade_order_list),
                                           MetricTransientMarketImpact(transient_market_impact_gap={'ms': 100},
                                                                       trade_order_list=trade_order_list),
                                           MetricPermanentMarketImpact(permanent_market_impact_gap={'h': 1},
                                                                       trade_order_list=trade_order_list)],
                             results_form=[TimelineResultsForm(metric_name='slippage', by_date='date'),
                                           BarResultsForm(metric_name='slippage', aggregate_by_field='venue')],
                             benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()],
                             trade_order_mapping=data_frame_trade_order_mapping, use_multithreading=False)

    # Dictionary of dataframes as output from TCA calculation
    dict_of_df = tca_engine.calculate_tca(tca_request)

    print(dict_of_df.keys())

    timeline_df = dict_of_df['timeline_' + trade_order_type + '_slippage_by_all']  # average slippage per day
    metric_df = dict_of_df[trade_order_type]['permanent_market_impact']  # permanent market impact for every trade

    print(metric_df.head(500))

    if PLOT:
        from chartpy import Chart, Style

        # plot slippage by timeline
        Chart(engine='plotly').plot(timeline_df)

        # plot market impact (per trade)
        Chart(engine='plotly').plot(metric_df.head(500))
Exemple #19
0
def tca_example_csv_trade_data_dukascopy():
    """Loads up trade/order data from CSV files and market data externally from Dukascopy. Does not use any databases, if
    you rarely use TCA, this is fine. However, for heavy use of TCA, we strongly recommend maintaining an internal tick
    database, as external downloading of data can be very slow.

    In this case we are simply calculating the slippage of every trade and orders above them.
    """

    from tcapy.analysis.tcaengine import TCAEngineImpl
    from tcapy.analysis.tcarequest import TCARequest

    from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid
    from tcapy.analysis.algos.metric import MetricSlippage

    from tcapy.analysis.algos.resultsform import TimelineResultsForm

    tca_version = constants.tcapy_version
    tca_engine = TCAEngineImpl(version=tca_version)

    # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017
    # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY'
    csv_trade_order_mapping = OrderedDict([
        ('trade_df', os.path.join(folder, 'small_test_trade_df.csv')),
        ('order_df', os.path.join(folder, 'small_test_order_df.csv'))
    ])

    # Specify the TCA request (note: by specifiying multithreading is False, we avoid dependencies like Celery

    # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them,
    # To force deletion of the cache you can run the below

    # volatile_cache.clear_cache()

    # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than
    # downloading it externally from Dukascopy
    tca_request = TCARequest(
        start_date='05 May 2017',
        finish_date='10 May 2017',
        ticker=['EURUSD'],
        tca_type='detailed',
        trade_data_store='csv',
        market_data_store='dukascopy',
        trade_order_mapping=csv_trade_order_mapping,
        metric_calcs=[MetricSlippage()],
        results_form=[
            TimelineResultsForm(metric_name='slippage',
                                by_date='datehour',
                                scalar=10000.0)
        ],
        benchmark_calcs=[BenchmarkArrival(),
                         BenchmarkSpreadToMid()],
        use_multithreading=False)

    # Dictionary of dataframes as output from TCA calculation
    dict_of_df = tca_engine.calculate_tca(tca_request)

    print(dict_of_df.keys())
def get_sample_data():
    """Load sample market/trade/order data
    """
    logger.info("About to load data for " + ticker[0])

    tca_engine = TCAEngineImpl()

    trade_order_results_df_dict = tca_engine.calculate_tca(get_tca_request())

    return trade_order_results_df_dict[ticker[0] + '_df']
Exemple #21
0
def test_invalid_dates_missing_data_tca(fill_market_trade_databases):
    """Tests if the trade/order and market data is identical for use_multithreading versus singlethreading for detailed,
    aggregated and compliance. Note that we need a running Celery server for use_multithreading to work (as well as the
    usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample
    """
    Mediator.get_volatile_cache().clear_cache()  # Clear cache to ensure all test code runs!

    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=valid_ticker_list,
                             trade_data_store=trade_data_store,
                             trade_data_database_name=trade_data_database_name,
                             market_data_store=market_data_store,
                             market_data_database_table=market_data_database_table,
                             trade_order_mapping=trade_order_mapping)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    ## Test invalid dates
    tca_request.start_date = invalid_start_date;
    tca_request.finish_date = invalid_finish_date

    for t in tca_type:
        for m in use_multithreading:
            tca_request.use_multithreading = m
            tca_request.tca_type = t

            exception_triggered = True

            try:
                dict_of_df_invalid = tca_engine.calculate_tca(tca_request=tca_request)

                exception_triggered = False

            except DataMissingException:
                assert exception_triggered

    ## Test a single valid ticker, but missing data (only one ticker)
    tca_request.start_date = start_date;
    tca_request.finish_date = finish_date;
    tca_request.ticker = missing_ticker

    for t in tca_type:
        for m in use_multithreading:
            Mediator.get_volatile_cache().clear_cache()  # Clear cache to ensure all test code runs!
            tca_request.use_multithreading = m
            tca_request.tca_type = t

            exception_triggered = True

            try:
                dict_of_df_missing_ticker = tca_engine.calculate_tca(tca_request=tca_request)

                exception_triggered = False

            except DataMissingException:
                assert exception_triggered
Exemple #22
0
def get_sample_data():
    from tcapy.analysis.algos.benchmark import BenchmarkMarketSpreadToMid
    logger.info("About to load data for " + ticker)

    tca_request = TCARequest(start_date='01 May 2017', finish_date='15 May 2017', ticker=ticker, trade_data_store='ms_sql_server',
                             market_data_store=market_data_store,
                             benchmark_calcs=[BenchmarkMarketSpreadToMid(bid_mid_bp=bid_mid_bp, ask_mid_bp=ask_mid_bp)],
                             trade_order_mapping=['trade_df'], tca_type=tca_type)

    tca_engine = TCAEngineImpl()

    trade_order_results_df_dict = tca_engine.calculate_tca(tca_request)
    trade_df = trade_order_results_df_dict['trade_df']

    return trade_order_results_df_dict[ticker + '_df'], trade_df
Exemple #23
0
def compare_multithreading_type():
    """Compares different type of use_multithreading types
    """
    tca_engine = TCAEngineImpl(version=tca_version)

    trade_order_list = ['trade_df', 'order_df']

    use_multithreading_list = [True, False]

    multithreading_params_list = [
        {'splice_request_by_dates' : True, 'cache_period' : 'day',
         'cache_period_trade_data' : True, 'cache_period_market_data' : True,
         'return_cache_handles_trade_data' : True, 'return_cache_handles_market_data' : True,
        'parallel_library' : 'celery'},
        {'splice_request_by_dates': False, 'cache_period': 'day',
         'cache_period_trade_data': True, 'cache_period_market_data': True,
         'return_cache_handles_trade_data' : True, 'return_cache_handles_market_data' : True,
         'parallel_library': 'celery'}
        ]

    for use_multithreading in use_multithreading_list:
        for multithreading_params in multithreading_params_list:
            start = time.time()

            # Specify the TCA request
            tca_request = TCARequest(start_date=long_start_date, finish_date=long_finish_date, ticker=ticker,
                                     tca_type='detailed',
                                     trade_data_store=trade_data_store, market_data_store=market_data_store,
                                     metric_calcs=[MetricSlippage(trade_order_list=trade_order_list),
                                                   MetricTransientMarketImpact(transient_market_impact_gap={'ms': 100},
                                                                               trade_order_list=trade_order_list),
                                                   MetricPermanentMarketImpact(permanent_market_impact_gap={'h': 1},
                                                                               trade_order_list=trade_order_list)],
                                     results_form=[TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0)],
                                     benchmark_calcs=[BenchmarkArrival(), BenchmarkMarketSpreadToMid()],
                                     trade_order_mapping=trade_order_list, use_multithreading=use_multithreading,
                                     multithreading_params=multithreading_params)

            # Dictionary of dataframes as output from TCA calculation
            dict_of_df = tca_engine.calculate_tca(tca_request)

            finish = time.time()

            print('Multithreading example: calculated ' + str(round(finish - start, 3)) + "s for, use_multithreading = "
                  + str(use_multithreading) + ' multithreading_params = ' + str(multithreading_params))
def test_stress_tca(fill_market_trade_databases):
    """Makes several large TCARequests at the same time to stress test tcapy application and also to check it works
    with parallel requests (note: you may need to reduce the length of the dataset if your machine has limited amounts of RAM).

    It can be possible that when deployed on the web, several users might make simultaneous requests. Note, do not use
    pylibmc, and instead use python-memcached, when using memcached as a result backend. pylibmc is not thread-safe so
    will come undone if you end up making parallel requests.
    """
    from tcapy.util.swim import Swim

    if not (stress_test):
        return

    # Clear cache to ensure all test code runs!
    Mediator.get_volatile_cache().clear_cache()

    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=valid_ticker_list,
        dummy_market=True,
        trade_data_store=trade_data_store,
        trade_data_database_name=trade_data_database_name,
        market_data_store=market_data_store,
        market_data_database_table=market_data_database_table,
        trade_order_mapping=trade_order_mapping,
        use_multithreading=True,
        tca_type='aggregated')

    # Kick off several simulanteous large TCA requests
    request_no = 2

    tca_request_list = []

    for i in range(0, request_no):
        tca_request_list.append(TCARequest(tca_request=tca_request))

    tca_engine = TCAEngineImpl(version=tcapy_version)

    swim = Swim(parallel_library='thread')
    pool = swim.create_pool(thread_no=len(tca_request_list))

    result = []

    for item in tca_request_list:
        result.append(pool.apply_async(tca_engine.calculate_tca,
                                       args=(item, )))

    output = [p.get() for p in result]

    swim.close_pool(pool, True)

    assert len(output) == len(tca_request_list)

    # Check that several DataFrames exist in the results
    for trade_order_results_df_dict in output:
        assert 'trade_df' in trade_order_results_df_dict.keys()
def test_overlapping_full_detailed_tca_calculation():
    """Tests a detailed TCA calculation works with caching and overlapping dates, checking that it has the right tables returned.
    """

    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker,
                             tca_type='detailed',
                             trade_data_store=trade_data_store,
                             trade_data_database_name=trade_data_database_name,
                             market_data_store=market_data_store,
                             market_data_database_table=market_data_database_table,
                             trade_order_mapping=trade_order_mapping, use_multithreading=use_multithreading)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    # Extend sample
    tca_request.start_date = pd.Timestamp(start_date) - timedelta(days=10)

    dict_of_df = tca_engine.calculate_tca(tca_request=tca_request)

    sparse_market_trade_df = dict_of_df['sparse_market_trade_df']

    assert len(sparse_market_trade_df.index[sparse_market_trade_df.index < '01 Jun 2017']) > 0
Exemple #26
0
    def __init__(self,
                 app,
                 session_manager,
                 callback_manager,
                 glob_volatile_cache,
                 layout,
                 callback_dict=None):
        super(TCACaller, self).__init__(app,
                                        session_manager,
                                        callback_manager,
                                        glob_volatile_cache,
                                        layout,
                                        callback_dict=callback_dict)

        self._generic_plot_flags = {
            '_candle_timeline_trade_order': 'candle-timeline-plot',
            '_table_trade_order': 'table',
            '_dist_trade_order': 'dist-plot',
            '_download_link_trade_order': 'download-link',
            '_timeline_trade_order': 'timeline-plot',
            '_bar_trade_order': 'bar-plot',
            '_dist_trade_order': 'dist-plot',
            '_metric_table_trade_order': 'table'
        }

        self._generic_line_flags = {
            '_candle_timeline_trade_order': [
                'candle-timeline-plot-lines-old',
                'candle-timeline-plot-lines-relayoutData-old'
            ]
        }

        self._plot_flags = self.create_plot_flags(session_manager, layout)

        self._reload_val_dict = {None: False, 'yes': True, 'no': False}

        self._tca_engine = TCAEngineImpl()
Exemple #27
0
class TCACaller(ComputationCaller, ABC):
    """Abstract class which adds listeners to the GUI buttons in the tcapy application for doing TCA _calculations. At
    initialisation it adds listeners for these buttons and links them to the various text box inputs (where the user
    can specify the various TCA parameters such as start date, finish date, ticker, TCA metrics etc.)

    When a button is pressed it triggers various "calculate" methods, which convert the GUI input, into TCARequest objects
    which are then sent to TCAEngine for doing the actual TCA computation. This analysis is then cached in Redis. The
    completion of this calculation will then trigger a callback from every display component (such as a plot or table)
    which search the cache for the appropriate output to display.

    If a user wishes to create programmatically call tcapy, it is recommended they create a TCARequest directly, rather
    than attempting to use TCACaller, and then submit that to a TCAEngine.
    """
    def __init__(self,
                 app,
                 session_manager,
                 callback_manager,
                 glob_volatile_cache,
                 layout,
                 callback_dict=None):
        super(TCACaller, self).__init__(app,
                                        session_manager,
                                        callback_manager,
                                        glob_volatile_cache,
                                        layout,
                                        callback_dict=callback_dict)

        self._generic_plot_flags = {
            '_candle_timeline_trade_order': 'candle-timeline-plot',
            '_table_trade_order': 'table',
            '_dist_trade_order': 'dist-plot',
            '_download_link_trade_order': 'download-link',
            '_timeline_trade_order': 'timeline-plot',
            '_bar_trade_order': 'bar-plot',
            '_dist_trade_order': 'dist-plot',
            '_metric_table_trade_order': 'table'
        }

        self._generic_line_flags = {
            '_candle_timeline_trade_order': [
                'candle-timeline-plot-lines-old',
                'candle-timeline-plot-lines-relayoutData-old'
            ]
        }

        self._plot_flags = self.create_plot_flags(session_manager, layout)

        self._reload_val_dict = {None: False, 'yes': True, 'no': False}

        self._tca_engine = TCAEngineImpl()

    def fill_computation_request_kwargs(self, kwargs, fields):
        """Fills a dictionary with the appropriate parameters which can be consumed by a TCARequest object. This involves
        a large number of object conversations, eg. str based dates to TimeStamps, metric names to Metric objects etc.

        Parameters
        ----------
        kwargs : dict
            Contains parameters related to TCA analysis

        fields : str(list)
            List of TCA fields we should fill with None if they don't exist in kwargs

        Returns
        -------
        dict
        """

        # Fill the major fields

        kwargs['ticker'] = self._util_func.remove_none_list(kwargs['ticker'])
        kwargs['venue'] = self._util_func.remove_none_list(kwargs['venue'])

        # Convert date strings into TimeStamp formats
        kwargs['start_date'] = pd.Timestamp(
            self._util_func.parse_datetime(str(kwargs['start_date'])))
        kwargs['finish_date'] = pd.Timestamp(
            self._util_func.parse_datetime(str(kwargs['finish_date'])))

        try:
            kwargs['reload'] = self._reload_val_dict[kwargs['reload']]
        except:
            kwargs['reload'] = False

        if 'event_type' not in kwargs.keys():
            kwargs['event_type'] = 'trade'

        if 'market_data' not in kwargs.keys():
            kwargs['market_data'] = constants.default_market_data_store

        # Fill empty fields with None
        for f in fields:
            if f not in kwargs:
                kwargs[f] = None

        # Add a trade filter for time day
        if kwargs['filter_time_of_day'] is not None:
            if kwargs['filter_time_of_day'] == 'yes':
                if 'start_time_of_day' in kwargs and 'finish_time_of_day' in kwargs:
                    kwargs = self.add_list_kwargs(
                        kwargs, 'trade_order_filter',
                        TradeOrderFilterTimeOfDayWeekMonth(
                            time_of_day={
                                'start_time': kwargs['start_time_of_day'],
                                'finish_time': kwargs['finish_time_of_day']
                            }))

        filter_tags = ['broker', 'algo']
        tag_value_combinations = {}

        for f in filter_tags:
            if kwargs[f] is not None:
                tag_value_combinations[f + '_id'] = kwargs[f]

        if len(tag_value_combinations) > 0:
            kwargs = self.add_list_kwargs(
                kwargs, 'trade_order_filter',
                TradeOrderFilterTag(
                    tag_value_combinations=tag_value_combinations))

        # Add metrics which have been specified (including as strings, which will be added with default parameters)
        if kwargs['metric_calcs'] is not None:
            if not (isinstance(kwargs['metric_calcs'], list)):
                kwargs['metric_calcs'] = [kwargs['metric_calcs']]

            for i in range(0, len(kwargs['metric_calcs'])):
                kwargs['metric_calcs'][i] = self.fill_metrics(
                    kwargs['metric_calcs'][i],
                    kwargs['metric_trade_order_list'], kwargs['event_type'])

        return kwargs

    def fill_metrics(self, metric, metric_trade_order_list, event_type):
        """Converts string describing metrics to the appropriate Metric object (with default parameters), which can later
        be consumed by the TCARequest object.

        Parameters
        ----------
        metric : str or Metric
            Can be a string (eg. 'slippage', 'transient_market_impact', 'permanent_market_impact') or an actual Metric
            object

        metric_trade_order_list : str (list)
            For which trades/orders should this metric be computed for.

        event_type : str
            Trade event type (eg. 'trade', 'cancel', 'cancel/replace' etc)

        Returns
        -------
        Metric
        """
        # if we are given strings of Metric, we need to create the appropriate Metric object in its place
        # NOTES: that we'll only have default arguments
        try:
            metric = metric.replace(' ', '_')

            executed_price = 'executed_price'

            # For placements, we wouldn't have an execution price, so closest we can do is the arrival price = mid for trades
            if event_type != 'trade':
                executed_price = 'arrival'

            if metric == 'slippage':
                return MetricSlippage(trade_order_list=metric_trade_order_list,
                                      executed_price=executed_price)
            elif metric == 'transient_market_impact':
                return MetricTransientMarketImpact(
                    trade_order_list=metric_trade_order_list,
                    executed_price=executed_price)
            elif metric == 'permanent_market_impact':
                return MetricPermanentMarketImpact(
                    trade_order_list=metric_trade_order_list,
                    executed_price=executed_price)

            ## ADD new metrics you write here (or better to subclass in your version of TCACaller)
        except:
            pass

        return metric

    def create_computation_request(self, **kwargs):
        """Creates a TCARequest object, populating its' fields with those from a kwargs dictionary, which consisted of
        parameters such as the start date, finish date, ticker, metrics to be computed, benchmark to be computed etd.

        The TCARequest object can later be consumed by a TCAEngine when it runs a TCA analysis.

        Parameters
        ----------
        kwargs : dict
            For describing a TCA analysis, such as the start date, finish date, ticker etc.

        Returns
        -------
        TCARequest
        """

        if 'tca_request' in kwargs.keys():
            return kwargs['tca_request']

        # Convert various string/objects into forms which can be accepted by TCARequest
        kwargs = self.fill_computation_request_kwargs(kwargs, [
            'trade_order_mapping', 'trade_order_filter', 'benchmark_calcs',
            'metric_calcs', 'join_tables', 'filter_time_of_day', 'broker',
            'algo', 'dummy_market'
        ])

        # Create a TCARequest object which can be consumed by TCAEngine, to run a TCA calculation
        return TCARequest(start_date=kwargs['start_date'],
                          finish_date=kwargs['finish_date'],
                          ticker=kwargs['ticker'],
                          venue=kwargs['venue'],
                          event_type=kwargs['event_type'],
                          market_data_store=kwargs['market_data'],
                          tca_type=kwargs['tca_type'],
                          reload=kwargs['reload'],
                          trade_order_mapping=kwargs['trade_order_mapping'],
                          trade_order_filter=kwargs['trade_order_filter'],
                          metric_calcs=kwargs['metric_calcs'],
                          benchmark_calcs=kwargs['benchmark_calcs'],
                          join_tables=kwargs['join_tables'],
                          results_form=kwargs['results_form'],
                          dummy_market=kwargs['dummy_market'])

    def run_computation_request(self, tca_request):
        """Kicks of the TCA analysis in the underlying TCAEngine using parameters specified

        Parameters
        ----------
        tca_request : TCARequest
            Governs start date/finish date, _tickers etc. of TCA analysis

        Returns
        -------
        dict
        """
        return self._tca_engine.calculate_tca(tca_request)
Exemple #28
0
def tca_example_csv_trade_data_dukascopy_no_redis():
    """Running TCA calculation but without any Redis caching at all. In practice, this should be avoided, since it will
    likely be much slower, given we'll end up accessing market data/trade data a lot more often from a slow source.

    This is particularly an issue when we're downloading large samples of market data from an external source. For very small
    time periods this might be fine.
    """
    from tcapy.analysis.tcaengine import TCAEngineImpl
    from tcapy.analysis.tcarequest import TCARequest

    from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid
    from tcapy.analysis.algos.metric import MetricSlippage

    from tcapy.analysis.algos.resultsform import TimelineResultsForm

    tca_version = constants.tcapy_version
    tca_engine = TCAEngineImpl(version=tca_version)

    # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017
    # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY'
    csv_trade_order_mapping = OrderedDict([
        ('trade_df', os.path.join(folder, 'small_test_trade_df.csv')),
        ('order_df', os.path.join(folder, 'small_test_order_df.csv'))
    ])

    # Specify the TCA request (note: by specifiying multithreading is False, we avoid dependencies like Celery

    # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them,
    # To force deletion of the cache you can run the below

    # volatile_cache.clear_cache()

    # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than
    # downloading it externally from Dukascopy
    tca_request = TCARequest(
        start_date='05 May 2017',
        finish_date='06 May 2017',
        ticker=['EURUSD'],
        tca_type='detailed',
        trade_data_store='csv',
        market_data_store='dukascopy',
        trade_order_mapping=csv_trade_order_mapping,
        metric_calcs=[MetricSlippage()],
        results_form=[
            TimelineResultsForm(metric_name='slippage',
                                by_date='datehour',
                                scalar=10000.0)
        ],
        benchmark_calcs=[BenchmarkArrival(),
                         BenchmarkSpreadToMid()],
        use_multithreading=False)

    tca_request.multithreading_params = {
        'splice_request_by_dates': False,  # True or False
        'cache_period': 'month',  # month or week

        # Cache trade data in monthly/periodic chunks in Redis (reduces database calls a lot)
        'cache_period_trade_data': False,

        # Cache market data in monthly/periodic chunks in Redis (reduces database calls a lot)
        'cache_period_market_data': False,

        # Return trade data internally as handles (usually necessary for Celery)
        'return_cache_handles_trade_data': False,

        # Return market data internally as handles (usually necessary for Celery)
        'return_cache_handles_market_data': False,

        # Recommend using Celery, which allows us to reuse Python processes
        'parallel_library': 'single'
    }

    # Dictionary of dataframes as output from TCA calculation
    dict_of_df = tca_engine.calculate_tca(tca_request)

    print(dict_of_df.keys())

    market_df = dict_of_df['market_df']

    market_df_minute = market_df.resample('1min').last()
    print(market_df_minute)
def test_multithreading_full_basic_tca():
    """Tests if the trade/order and market data is identical for multithreading versus singlethreading for detailed,
    aggregated and compliance. Note that we need a running Celery server for multithreading to work (as well as the
    usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample
    """
    Mediator.get_volatile_cache().clear_cache(
    )  # clear cache to ensure all test code runs!

    tca_request = TCARequest(start_date=multithreading_start_date,
                             finish_date=multithreading_finish_date,
                             ticker=valid_ticker_list,
                             trade_data_store=trade_data_store,
                             market_data_store=market_data_store,
                             trade_order_mapping=trade_order_mapping)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    tca_type = ['aggregated', 'detailed', 'compliance']

    multithreading = [True, False]

    #### Checked the executed prices match with single and multithreaded cases
    for t in tca_type:
        dict_list = []

        for m in multithreading:
            Mediator.get_volatile_cache(version=tcapy_version).clear_cache(
            )  # Clear cache to ensure all test code runs!

            tca_request.use_multithreading = m
            tca_request.tca_type = t
            dict_list.append(tca_engine.calculate_tca(tca_request=tca_request))

        print("tca_request " + t)

        for k in dict_list[0].keys():
            multi_df = dict_list[0][k]
            single_df = dict_list[1][k]

            if isinstance(single_df, pd.DataFrame) and isinstance(
                    multi_df, pd.DataFrame):
                if 'executed_price' in single_df.columns and 'executed_price' in multi_df.columns:

                    print("tablecomputation " + k)

                    exec_multi = multi_df['executed_price'].dropna()
                    exec_single = single_df['executed_price'].dropna()

                    if dump_csv_output:
                        df = pd.DataFrame(exec_multi)

                        df1 = pd.DataFrame(exec_single)
                        df1.columns = [x + '_single' for x in df1.columns]

                        df = df.join(pd.DataFrame(df1), how='outer')

                        df.to_csv(k + "_test.csv")

                        df_large = single_df.join(multi_df,
                                                  lsuffix='_single',
                                                  rsuffix='_multi',
                                                  how='outer')
                        df_large.to_csv(k + "_test_full.csv")

                    assert all(exec_multi - exec_single < eps)

                    # Only check trade/orders and not any of the other DataFrames returned
                    if 'id' in multi_df.columns and 'id' in single_df.columns:
                        # check we have unique IDs (we should have unique IDs for every event_type trade (but the id's will
                        # be the same for placements)
                        id_multi = multi_df['id']
                        id_single = single_df['id']

                        assert len(id_multi.index) == len(id_multi.index)

                        assert len(id_multi.unique()) == len(id_single.index)
                        assert len(id_multi.index) == len(id_single.unique())
Exemple #30
0
def single_ticker_tca_example():
    """Example for doing detailed TCA analysis on the trades of a single ticker, calculating metrics for slippage,
    transient market impact & permanent market impact. It also calculates benchmarks for arrival price of each trade and
    spread to mid).

    Creates a TCAReport which generates standalone HTML and PDF files

    Also on a lower level it collects results for slippage into a daily timeline and also average by venue (by default
    weights by reporting currency)
    """

    # Note: running Orca might not work in WSL, also when generating Plotly charts, might get an error with WSL, if
    # it doesn't have silent_display=True, as it will try to open a web page in a browser (which isn't supported in WSL1
    # but is in WSL2)
    PLOT = True

    # clear entire cache
    # Mediator.get_volatile_cache().clear_cache()

    tca_engine = TCAEngineImpl(version=tca_version)

    trade_order_type = 'trade_df'
    trade_order_list = ['trade_df', 'order_df']

    # Ensure orca is started, if want to convert to PDF (sometimes you may need to specify the path)
    # Can be slow to start
    if PLOT:
        from chartpy.engine import EnginePlotly
        EnginePlotly().start_orca()  # constants.orca_server_path)

    # specify the TCA request
    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=ticker,
        tca_type='detailed',
        dummy_market=False,
        trade_data_store=trade_data_store,
        market_data_store=market_data_store,
        metric_calcs=[  # Calculate the slippage for trades/order
            MetricSlippage(trade_order_list=trade_order_list),

            # Calculate the shorter and longer term market impact after every trade/order
            MetricTransientMarketImpact(
                transient_market_impact_gap={'ms': 100},
                trade_order_list=trade_order_list),
            MetricPermanentMarketImpact(permanent_market_impact_gap={'h': 1},
                                        trade_order_list=trade_order_list)
        ],
        results_form=[  # Aggregate the slippage average by date and hour
            TimelineResultsForm(metric_name='slippage',
                                by_date='datehour',
                                scalar=10000.0),

            # Aggregate the total executed notional in reporting currency (usually USD)
            # for every hour
            TimelineResultsForm(
                metric_name='executed_notional_in_reporting_currency',
                by_date='datehour',
                aggregation_metric='sum',
                scalar=1.0),

            # Aggregate the average slippage on trades by venue
            HeatmapResultsForm(
                metric_name=['slippage', 'transient_market_impact'],
                aggregate_by_field=['venue', 'ticker'],
                scalar=10000.0,
                trade_order_list='trade_df'),

            # Aggregate the average slippage on trades by venue
            BarResultsForm(metric_name='slippage',
                           aggregate_by_field='venue',
                           scalar=10000.0,
                           trade_order_list='trade_df'),

            # Aggregate the average slippage on trades/orders by broker_id
            BarResultsForm(metric_name='slippage',
                           aggregate_by_field='broker_id',
                           scalar=10000.0),

            # Aggregate the average slippage on trades/orders by broker_id
            DistResultsForm(metric_name='slippage',
                            aggregate_by_field='side',
                            scalar=10000.0),

            # Create a scatter chart of slippage vs. executed notional
            ScatterResultsForm(scatter_fields=[
                'slippage', 'executed_notional_in_reporting_currency'
            ],
                               scalar={'slippage': 10000.0})
        ],
        benchmark_calcs=[  # At the arrival price for every trade/order
            BenchmarkArrival(),

            # At the spread at the time of every trade/order
            BenchmarkMarketSpreadToMid()
        ],
        trade_order_mapping=trade_order_list,
        use_multithreading=True)

    # Dictionary of dataframes as output from TCA calculation
    dict_of_df = tca_engine.calculate_tca(tca_request)

    print(dict_of_df['trade_df'])

    print(dict_of_df.keys())

    # Heatmap of slippage and transient market impact broken down by venue and ticker
    heatmap_slippage_market_impact_df = dict_of_df[
        'heatmap_' + trade_order_type +
        '_slippage#transient_market_impact_by/mean/venue#ticker']

    print(heatmap_slippage_market_impact_df)

    # Average slippage per date/hour
    timeline_slippage_df = dict_of_df['timeline_' + trade_order_type +
                                      '_slippage_by/mean_datehour/all']

    # Total executed notional per date/hour
    timeline_executed_notional_df = dict_of_df[
        'timeline_' + trade_order_type +
        '_executed_notional_in_reporting_currency_by/sum_datehour/all']

    # Permanent market impact for every trade
    metric_df = dict_of_df[trade_order_type]['permanent_market_impact']

    print(metric_df.head(500))

    from tcapy.vis.report.computationreport import JinjaRenderer

    if PLOT:
        ### Generate TCA report using high level object
        # Use higher level TCAResults object to encapsulate results (easier to deal with than a dictionary of DataFrames)
        tca_results = TCAResults(dict_of_df, tca_request)
        tca_results.render_computation_charts()

        tca_report = TCAReport(tca_results, renderer=JinjaRenderer())

        tca_report.create_report(output_filename='test_tca_report.htm',
                                 output_format='html',
                                 offline_js=False)

        # Note needs plotly orca + wkhtmltopdf installed to render PDFs
        try:
            tca_report.create_report(output_filename='test_tca_report.pdf',
                                     output_format='pdf')
        except Exception as e:
            print(str(e))

        ### Lower level creation of TCA report

        ### Plot charts individually

        # Plot slippage by timeline
        Chart(engine='plotly').plot(timeline_slippage_df)

        # Plot total executed notional by timeline
        Chart(engine='plotly').plot(timeline_executed_notional_df)

        # Plot market impact (per trade)
        Chart(engine='plotly').plot(metric_df.head(500))