def example_calculate_spread_to_mid_benchmark():
    """Example on how to add spread to the benchmark market data
    """
    from tcapy.analysis.algos.benchmark import BenchmarkMarketSpreadToMid

    market_df = get_sample_data()

    benchmark_spread_to_mid = BenchmarkMarketSpreadToMid()
    market_df = benchmark_spread_to_mid.calculate_benchmark(market_df=market_df)

    print(market_df)
def example_request_mid_benchmark():
    """Example of how to do a calculation to do market analysis to calculate mid, resample etc. without any trade data

    """
    from tcapy.analysis.algos.benchmark import BenchmarkMarketMid, BenchmarkMarketSpreadToMid, BenchmarkMarketResampleOffset

    tca_request = get_tca_request()

    # Allow analysis to be done in a parallel way day by day
    # (note: can't do analysis which requires data outside of the day to do this!)
    tca_request.multithreading_params['splice_request_by_dates'] = True

    # We'll calculate the market mid, then calculate the spread to the mid, then we shall resample the data into 1 minute
    # data, taking the mean of each minute (and TWAP)
    tca_request.benchmark_calcs = [
        BenchmarkMarketMid(),
        BenchmarkMarketSpreadToMid(),
        BenchmarkMarketResampleOffset(market_resample_freq='1',
                                      market_resample_unit='min',
                                      price_field='mid',
                                      resample_how=['mean', 'twap'])
    ]
    tca_request.use_multithreading = True

    tca_engine = TCAEngineImpl()

    dict_of_df = tca_engine.calculate_tca(tca_request)

    print(dict_of_df)
Beispiel #3
0
def example_request_mid_benchmark():
    """Example of how to do a calculation to do market analysis to calculate mid, resample etc. without any trade data

    """
    from tcapy.analysis.algos.benchmark import BenchmarkMarketMid, BenchmarkMarketSpreadToMid, BenchmarkMarketResampleOffset, \
        BenchmarkMarketFilter
    from tcapy.analysis.algos.resultsform import BarResultsForm, TimelineResultsForm

    tca_request = get_tca_request()

    # Allow analysis to be done in a parallel approach day by day
    # (note: can't do analysis which requires data outside of the daily chunks to do this!)
    tca_request.multithreading_params['splice_request_by_dates'] = use_multithreading

    # Filter market data by time of day between 15:00-17:00 LDN
    # Then calculate the market mid, then calculate the spread to the mid,
    # Then resample the data into 1 minute, taking the mean of each minute (and TWAP) and calculating the absolute range
    tca_request.benchmark_calcs = [BenchmarkMarketFilter(time_of_day={'start_time' : "15:00", 'finish_time' : "17:00"},
                                                         time_zone='Europe/London'),
                                   BenchmarkMarketMid(), BenchmarkMarketSpreadToMid(),
                                   BenchmarkMarketResampleOffset(market_resample_freq='1', market_resample_unit='min',
                                        price_field='mid', resample_how=['mean', 'twap', 'absrange'], dropna=True),
                                   ]

    # Calculate the mean spread to mid for EURUSD by time of day during our sample (do not weight by any other field)
    # Calculate the mean absrange for EURUSD by time of day (London timezone)/month of _year (ie. proxy for volatility)
    tca_request.results_form = \
        [TimelineResultsForm(market_trade_order_list='EURUSD', metric_name='ask_mid_spread',
                             weighting_field=None, by_date='time', scalar=10000.0),
         TimelineResultsForm(market_trade_order_list='EURUSD', metric_name='absrange',
                             weighting_field=None, by_date=['month', 'timeldn'], scalar=10000.0)
        ]

    # return
    tca_request.use_multithreading = True

    tca_engine = TCAEngineImpl()

    dict_of_df = tca_engine.calculate_tca(tca_request)

    # Print out all keys for all the DataFrames returned
    print(dict_of_df.keys())

    # Print market data snapshots
    print(dict_of_df['EURUSD_df'])
    print(dict_of_df['USDJPY_df'])
    print(dict_of_df['EURUSD_df'].columns)
    print(dict_of_df['USDJPY_df'].columns)

    # Print out mean spread by time of day
    print(dict_of_df['timeline_EURUSD_ask_mid_spread_by/mean_time/all'])

    # Plot mean spread by time of day and absrange by time of day (in London timezone)
    Chart(engine='plotly').plot(dict_of_df['timeline_EURUSD_ask_mid_spread_by/mean_time/all'])

    # Plot absolute range over each minute, averaged by time of day and month of the _year
    Chart(engine='plotly').plot(dict_of_df['timeline_EURUSD_absrange_by/mean_month_timeldn/all'],
                                style=Style(title='EURUSD absolute range by time of day (LDN)', color='Reds', scale_factor=-1))
Beispiel #4
0
def tca_example_csv_trade_data_dukascopy():
    """Loads up trade/order data from CSV files and market data externally from Dukascopy. Does not use any databases, if
    you rarely use TCA, this is fine. However, for heavy use of TCA, we strongly recommend maintaining an internal tick
    database, as external downloading of data can be very slow.

    In this case we are simply calculating the slippage of every trade and orders above them.
    """

    from tcapy.analysis.tcaengine import TCAEngineImpl
    from tcapy.analysis.tcarequest import TCARequest

    from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkMarketSpreadToMid
    from tcapy.analysis.algos.metric import MetricSlippage

    from tcapy.analysis.algos.resultsform import TimelineResultsForm

    tca_version = constants.tcapy_version
    tca_engine = TCAEngineImpl(version=tca_version)

    # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017
    # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY'
    csv_trade_order_mapping = OrderedDict([
        ('trade_df', os.path.join(folder, 'small_test_trade_df.csv')),
        ('order_df', os.path.join(folder, 'small_test_order_df.csv'))
    ])

    # Specify the TCA request (note: by specifiying use_multithreading is False, we avoid dependencies like Celery

    # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them,
    # To force deletion of the cache you can run the below

    # volatile_cache.clear_cache()

    # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than
    # downloading it externally from Dukascopy
    tca_request = TCARequest(
        start_date='05 May 2017',
        finish_date='10 May 2017',
        ticker=['EURUSD'],
        tca_type='detailed',
        trade_data_store='csv',
        market_data_store='dukascopy',
        trade_order_mapping=csv_trade_order_mapping,
        metric_calcs=[MetricSlippage()],
        results_form=[
            TimelineResultsForm(metric_name='slippage',
                                by_date='datehour',
                                scalar=10000.0)
        ],
        benchmark_calcs=[BenchmarkArrival(),
                         BenchmarkMarketSpreadToMid()],
        use_multithreading=False)

    # Dictionary of dataframes as output from TCA calculation
    dict_of_df = tca_engine.calculate_tca(tca_request)

    print(dict_of_df.keys())
Beispiel #5
0
def get_sample_data():
    from tcapy.analysis.algos.benchmark import BenchmarkMarketSpreadToMid
    logger.info("About to load data for " + ticker)

    tca_request = TCARequest(start_date='01 May 2017', finish_date='15 May 2017', ticker=ticker, trade_data_store='ms_sql_server',
                             market_data_store=market_data_store,
                             benchmark_calcs=[BenchmarkMarketSpreadToMid(bid_mid_bp=bid_mid_bp, ask_mid_bp=ask_mid_bp)],
                             trade_order_mapping=['trade_df'], tca_type=tca_type)

    tca_engine = TCAEngineImpl()

    trade_order_results_df_dict = tca_engine.calculate_tca(tca_request)
    trade_df = trade_order_results_df_dict['trade_df']

    return trade_order_results_df_dict[ticker + '_df'], trade_df
Beispiel #6
0
def tca_example_csv_trade_data_dukascopy_no_redis():
    """Running TCA calculation but without any Redis caching at all. In practice, this should be avoided, since it will
    likely be much slower, given we'll end up accessing market data/trade data a lot more often from a slow source.

    This is particularly an issue when we're downloading large samples of market data from an external source. For very small
    time periods this might be fine.
    """
    from tcapy.analysis.tcaengine import TCAEngineImpl
    from tcapy.analysis.tcarequest import TCARequest

    from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkMarketSpreadToMid
    from tcapy.analysis.algos.metric import MetricSlippage

    from tcapy.analysis.algos.resultsform import TimelineResultsForm

    tca_version = constants.tcapy_version
    tca_engine = TCAEngineImpl(version=tca_version)

    # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017
    # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY'
    csv_trade_order_mapping = OrderedDict([('trade_df', os.path.join(folder, 'small_test_trade_df.csv')),
                                           ('order_df', os.path.join(folder, 'small_test_order_df.csv'))])

    # Specify the TCA request (note: by specifiying multithreading is False, we avoid dependencies like Celery

    # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them,
    # To force deletion of the cache you can run the below

    # volatile_cache.clear_cache()

    # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than
    # downloading it externally from Dukascopy
    tca_request = TCARequest(start_date='05 May 2017', finish_date='06 May 2017', ticker=['EURUSD'],
                             tca_type='detailed',
                             trade_data_store='csv', market_data_store='dukascopy',
                             trade_order_mapping=csv_trade_order_mapping,
                             metric_calcs=[MetricSlippage()],
                             results_form=[
                                 TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0)],
                             benchmark_calcs=[BenchmarkArrival(), BenchmarkMarketSpreadToMid()],
                             use_multithreading=False)

    tca_request.multithreading_params = {'splice_request_by_dates': False,  # True or False
                                         'cache_period': 'month',  # month or week

                                         # Cache trade data in monthly/periodic chunks in Redis (reduces database calls a lot)
                                         'cache_period_trade_data': False,

                                         # Cache market data in monthly/periodic chunks in Redis (reduces database calls a lot)
                                         'cache_period_market_data': False,

                                         # Return trade data internally as handles (usually necessary for Celery)
                                         'return_cache_handles_trade_data': False,

                                         # Return market data internally as handles (usually necessary for Celery)
                                         'return_cache_handles_market_data': False,

                                         # Recommend using Celery, which allows us to reuse Python processes
                                         'parallel_library': 'single'
                                         }

    # Dictionary of dataframes as output from TCA calculation
    dict_of_df = tca_engine.calculate_tca(tca_request)

    print(dict_of_df.keys())

    market_df = dict_of_df['market_df']

    market_df_minute = market_df.resample('1min').last()
    print(market_df_minute)