Beispiel #1
0
def fill_market_trade_databases():
    """Fills market and trade data with test data
    """
    Mediator.get_volatile_cache().clear_cache()

    replace_append = 'replace'

    # Fill market data (assume: CHUNK_STORE as our default format!)
    for ticker in ticker_arctic:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=arctic_lib_type)

        # Write CSV to Arctic
        database_source.convert_csv_to_table(csv_market_data_store,
                                             ticker,
                                             market_data_table,
                                             if_exists_table=replace_append,
                                             if_exists_ticker='replace',
                                             market_trade_data='market',
                                             remove_duplicates=False)

        replace_append = 'append'

    # Fill trade/order data
    database_source = DatabaseSourceMySQL()

    for t in trade_order_list:
        # Dump trade_df to SQL test harness database and overwrite
        database_source.convert_csv_to_table(
            csv_trade_order_mapping[t],
            None, (trade_order_mapping[trade_data_store])[t],
            database_name=trade_data_database_name,
            if_exists_table='replace',
            market_trade_data='trade')
Beispiel #2
0
    def __init__(self, tables_dict={}, scalar=1, round_figures_by=None):
        self._tables_dict = tables_dict
        self._scalar = scalar
        self._round_figures_by = round_figures_by

        self._time_series_ops = Mediator.get_time_series_ops()
        self._util_func = Mediator.get_util_func()
Beispiel #3
0
def test_executed_price_notional_calculation(fill_market_trade_databases):
    """Test that the executed average price calculation from trades is correctly reflected in the order level
    """
    Mediator.get_volatile_cache().clear_cache()

    market_df, trade_df, order_df = get_sample_data()

    # get the first and last points given boundary cases (and a few other random orders) to check
    index_boundary = np.random.randint(0, len(order_df.index) - 1, 100)
    index_boundary = index_boundary.tolist()
    index_boundary.append(0)
    index_boundary.append(-1)

    for i in index_boundary:
        # order_df.ix[i, 'notional']
        if order_df.iloc[i]['notional'] > 1:
            # executed_price = order_df.ix[i, 'executed_price']
            # id = order_df.ix[i, 'id']
            executed_price = order_df.iloc[i]['executed_price']
            id = order_df.iloc[i]['id']

            executed_price_trade = trade_df[trade_df['ancestor_pointer_id'] ==
                                            id]['executed_price'].fillna(0)
            executed_notional_trade = trade_df[
                trade_df['ancestor_pointer_id'] ==
                id]['executed_notional'].fillna(0)

            executed_avg_trade = (
                (executed_price_trade * executed_notional_trade).sum() /
                executed_notional_trade.sum())

            assert abs(executed_price - executed_avg_trade) < eps
Beispiel #4
0
def test_create_tca_report(fill_market_trade_databases):
    """Tests the creation of a TCAResults, checking they are fichecking it generates the right document
    """
    Mediator.get_volatile_cache().clear_cache()

    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=ticker,
        trade_data_store=trade_data_store,
        trade_data_database_name=trade_data_database_name,
        market_data_store=market_data_store,
        market_data_database_table=market_data_database_table,
        trade_order_mapping=trade_order_mapping,
        metric_calcs=MetricSlippage(),
        results_form=TimelineResultsForm(metric_name='slippage',
                                         by_date='datehour'),
        use_multithreading=use_multithreading)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    tca_results = TCAResults(tca_engine.calculate_tca(tca_request=tca_request),
                             tca_request)
    tca_results.render_computation_charts()

    assert tca_results.timeline is not None and tca_results.timeline_charts is not None

    tca_report = TCAReport(tca_results)
    html = tca_report.create_report()

    # Quick check to see that the html has been generated by checking existance of HTML head _tag
    assert '<head>' in html
Beispiel #5
0
def test_market_data_convention(fill_market_trade_databases):
    """Tests that market data for unusual quotations is consistent (ie. if the user requests USDEUR, this should be
    inverted EURUSD (which is the correct convention)
    """
    Mediator.get_volatile_cache().clear_cache()

    market_loader =  Mediator.get_tca_market_trade_loader(version=tcapy_version)
    market_request = MarketRequest(start_date=start_date, finish_date=finish_date, ticker=ticker,
                             data_store=market_data_store, market_data_database_table=market_data_database_table)

    #### Compare EURUSD to USDEUR
    market_correct_conv_series = pd.DataFrame(market_loader.get_market_data(market_request)['mid'])

    market_request.ticker = 'USDEUR'
    market_reverse_conv_series = pd.DataFrame(1.0 / market_loader.get_market_data(market_request)['mid'])

    assert_frame_equal(market_correct_conv_series, market_reverse_conv_series, check_dtype=False)

    ### Compare EURJPY (which is autogenerated, if EURJPY is not collected directly) vs. EURUSD & USDJPY multiplied

    # Use resampled series for comparison
    market_request.ticker = 'USDJPY'
    market_df_USDJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid'])

    market_request.ticker = 'EURJPY'
    market_df_EURJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid']).resample('1min').mean()

    market_df_EURJPY_comp = (market_correct_conv_series.resample('1min').mean() * market_df_USDJPY.resample('1min').mean())

    market_df_EURJPY, market_df_EURJPY_comp = market_df_EURJPY.align(market_df_EURJPY_comp, join='inner')

    comp = (market_df_EURJPY - market_df_EURJPY_comp).dropna()

    assert all(comp < eps)
Beispiel #6
0
def test_data_offset():
    """Tests the offsetting of market and trade data by milliseconds by user. This might be useful if clocks are slightly
    offset when recording market or trade data
    """
    Mediator.get_volatile_cache().clear_cache()

    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker,
                             trade_data_store=trade_data_store,
                             market_data_store=market_data_store,
                             trade_order_mapping=trade_order_mapping)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    dict_of_df = tca_engine.calculate_tca(tca_request=tca_request)

    # Now offset both the trade and market data
    tca_request.trade_data_offset_ms = 1
    tca_request.market_data_offset_ms = -1

    dict_of_df_offset = tca_engine.calculate_tca(tca_request=tca_request)

    trade_df = dict_of_df[trade_df_name]; market_df = dict_of_df['market_df']
    trade_df_offset = dict_of_df_offset[trade_df_name]; market_df_offset = dict_of_df_offset['market_df']

    assert all(market_df.index + timedelta(milliseconds=-1) == market_df_offset.index)
    assert all(trade_df.index + timedelta(milliseconds=1) == trade_df_offset.index)

    for c in constants.date_columns:
        if c in trade_df.columns:
            assert all(trade_df[c]+ timedelta(milliseconds=1) == trade_df_offset[c])
def test_stress_tca(fill_market_trade_databases):
    """Makes several large TCARequests at the same time to stress test tcapy application and also to check it works
    with parallel requests (note: you may need to reduce the length of the dataset if your machine has limited amounts of RAM).

    It can be possible that when deployed on the web, several users might make simultaneous requests. Note, do not use
    pylibmc, and instead use python-memcached, when using memcached as a result backend. pylibmc is not thread-safe so
    will come undone if you end up making parallel requests.
    """
    from tcapy.util.swim import Swim

    if not (stress_test):
        return

    # Clear cache to ensure all test code runs!
    Mediator.get_volatile_cache().clear_cache()

    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=valid_ticker_list,
        dummy_market=True,
        trade_data_store=trade_data_store,
        trade_data_database_name=trade_data_database_name,
        market_data_store=market_data_store,
        market_data_database_table=market_data_database_table,
        trade_order_mapping=trade_order_mapping,
        use_multithreading=True,
        tca_type='aggregated')

    # Kick off several simulanteous large TCA requests
    request_no = 2

    tca_request_list = []

    for i in range(0, request_no):
        tca_request_list.append(TCARequest(tca_request=tca_request))

    tca_engine = TCAEngineImpl(version=tcapy_version)

    swim = Swim(parallel_library='thread')
    pool = swim.create_pool(thread_no=len(tca_request_list))

    result = []

    for item in tca_request_list:
        result.append(pool.apply_async(tca_engine.calculate_tca,
                                       args=(item, )))

    output = [p.get() for p in result]

    swim.close_pool(pool, True)

    assert len(output) == len(tca_request_list)

    # Check that several DataFrames exist in the results
    for trade_order_results_df_dict in output:
        assert 'trade_df' in trade_order_results_df_dict.keys()
Beispiel #8
0
def test_invalid_dates_missing_data_tca(fill_market_trade_databases):
    """Tests if the trade/order and market data is identical for use_multithreading versus singlethreading for detailed,
    aggregated and compliance. Note that we need a running Celery server for use_multithreading to work (as well as the
    usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample
    """
    Mediator.get_volatile_cache().clear_cache()  # Clear cache to ensure all test code runs!

    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=valid_ticker_list,
                             trade_data_store=trade_data_store,
                             trade_data_database_name=trade_data_database_name,
                             market_data_store=market_data_store,
                             market_data_database_table=market_data_database_table,
                             trade_order_mapping=trade_order_mapping)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    ## Test invalid dates
    tca_request.start_date = invalid_start_date;
    tca_request.finish_date = invalid_finish_date

    for t in tca_type:
        for m in use_multithreading:
            tca_request.use_multithreading = m
            tca_request.tca_type = t

            exception_triggered = True

            try:
                dict_of_df_invalid = tca_engine.calculate_tca(tca_request=tca_request)

                exception_triggered = False

            except DataMissingException:
                assert exception_triggered

    ## Test a single valid ticker, but missing data (only one ticker)
    tca_request.start_date = start_date;
    tca_request.finish_date = finish_date;
    tca_request.ticker = missing_ticker

    for t in tca_type:
        for m in use_multithreading:
            Mediator.get_volatile_cache().clear_cache()  # Clear cache to ensure all test code runs!
            tca_request.use_multithreading = m
            tca_request.tca_type = t

            exception_triggered = True

            try:
                dict_of_df_missing_ticker = tca_engine.calculate_tca(tca_request=tca_request)

                exception_triggered = False

            except DataMissingException:
                assert exception_triggered
Beispiel #9
0
    def __init__(self, trade_order_list=None, metric_name=None, aggregate_by_field=None, aggregation_metric='mean',
                 tag_value_combinations={}):
        if not(isinstance(trade_order_list, list)) and trade_order_list is not None:
            trade_order_list = [trade_order_list]

        self._trade_order_list = trade_order_list
        self._metric_name = metric_name
        self._aggregate_by_field = aggregate_by_field
        self._aggregation_metric = aggregation_metric
        self._results_summary = ResultsSummary()

        self._tag_value_combinations = tag_value_combinations
        self._trade_order_filter_tag = TradeOrderFilterTag()
        self._util_func = Mediator.get_util_func()
        self._time_series_ops = Mediator.get_time_series_ops()
Beispiel #10
0
    def _convert_tuple_to_market_trade(self, market_trade_order_tuple):
        volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine)

        # Gather market and trade/order data (which might be stored in a list)
        if isinstance(market_trade_order_tuple, list):
            market_df_list = []
            trade_order_holder = DataFrameHolder()

            for market_df_single, trade_order_holder_single in market_trade_order_tuple:
                market_df_list.append(market_df_single)

                trade_order_holder.add_dataframe_holder(trade_order_holder_single)

            market_df_list = volatile_cache.get_dataframe_handle(market_df_list, burn_after_reading=True)

            # to ensure that any spurious/None elements are removed
            market_df_list = [x for x in market_df_list if isinstance(x, pd.DataFrame)]

            # want to make sure the data is properly ordered too (not guarenteed we'll get it back in right order)
            market_df = self._time_series_ops.concat_dataframe_list(market_df_list)

        else:
            market_df = volatile_cache.get_dataframe_handle(market_trade_order_tuple[0], burn_after_reading=True)
            trade_order_holder = market_trade_order_tuple[1]

        return market_df, trade_order_holder
Beispiel #11
0
    def _get_correct_convention_market_data(self, market_request, start_date=None, finish_date=None):
        # Check that cross is in correct convention
        if self._fx_conv.correct_notation(market_request.ticker) != market_request.ticker:
            raise Exception('Method expecting only crosses in correct market convention')

        cache = True

        if isinstance(market_request.data_store, pd.DataFrame):
            cache = False

        if market_request.multithreading_params['cache_period_market_data'] and cache:
            volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine)

            start_date, finish_date, market_key, market_df = \
                volatile_cache.get_data_request_cache(market_request, market_request.data_store, 'market_df',
                                                      market_request.data_offset_ms)

            if market_df is None:
                market_df = super(TCATickerLoaderImpl, self)._get_underlying_market_data(start_date, finish_date, market_request)

                volatile_cache.put_data_request_cache(market_request, market_key, market_df)

            return self._strip_start_finish_dataframe(market_df, start_date, finish_date, market_request)
        else:
            if start_date is None or finish_date is None:
                start_date = market_request.start_date
                finish_date = market_request.finish_date

            return super(TCATickerLoaderImpl, self)._get_underlying_market_data(start_date, finish_date,
                                                                                     market_request)
Beispiel #12
0
def example_market_data_non_usd_cross():
    """Example for loading market data which has more exotic crosses, which are unlikely to be collected. For these
    exotic crosses tcapy will calculate the cross rates via the USD legs, eg. NZDCAD would be calculated from
    NZDUSD and USDCAD data.
    """
    market_loader = Mediator.get_tca_market_trade_loader()

    tca_request = TCARequest(start_date=start_date,
                             finish_date=finish_date,
                             ticker='NZDUSD',
                             market_data_store=market_data_store)

    market_base_df = market_loader.get_market_data(tca_request)

    tca_request.ticker = 'USDCAD'
    market_terms_df = market_loader.get_market_data(tca_request)

    market_df = pd.DataFrame(market_base_df['mid'] *
                             market_terms_df['mid']).dropna()

    tca_request.ticker = 'NZDCAD'
    market_direct_df = market_loader.get_market_data(tca_request)

    market_df, market_direct_df = market_df.align(market_direct_df,
                                                  join='inner')

    # check time series are equal to each other
    assert (market_df['mid'] - market_direct_df['mid']).sum() == 0
Beispiel #13
0
    def __init__(self,
                 market_data_postfix='dukascopy',
                 csv_market_data=None,
                 write_to_db=True,
                 sql_trade_database_type='ms_sql_server'):
        if csv_market_data is None:
            self._market_data_source = 'arctic-' + market_data_postfix
        else:
            self._market_data_source = csv_market_data

        self._tca_market = Mediator.get_tca_market_trade_loader()

        # Assumes MongoDB for tick data and MSSQL for trade/order data
        if write_to_db:
            self._database_source_market = DatabaseSourceArctic(
                postfix=market_data_postfix)  # market data source

            self._market_data_database_name = constants.arctic_market_data_database_name
            self._market_data_database_table = constants.arctic_market_data_database_table

            if sql_trade_database_type == 'ms_sql_server':
                self._database_source_trade = DatabaseSourceMSSQLServer(
                )  # trade data source
                self._trade_data_database_name = constants.ms_sql_server_trade_data_database_name
            elif sql_trade_database_type == 'mysql':
                self._database_source_trade = DatabaseSourceMySQL(
                )  # trade data source
                self._trade_data_database_name = constants.mysql_trade_data_database_name

        self.time_series_ops = TimeSeriesOps()
        self.rand_time_series = RandomiseTimeSeries()
Beispiel #14
0
    def normalize_trade_data(self, df, dataset, data_request):

        if df is None: return None

        # For cancelled trades the trade price might be recorded as "zero" or a negative price, which is invalid, make these NaNs
        if 'executed_price' in df.columns:
            # df['executed_price'][df['executed_price'] <= 0] = np.nan
            df.loc[df['executed_price'] <= 0, 'executed_price'] = np.nan

        # Rename fields if necessary
        if 'executed_notional_currency' in df.columns:
            df = df.rename(columns={'executed_notional_currency' : 'notional_currency'})

        # Convert buy/sell to -1/+1

        # TODO do regex/case insensitive version
        # vals_to_replace = {'buy': 1, 'sell' : -1, 'Buy' : 1, 'Sell' : -1, 'BUY' : 1, 'SELL' : -1}
        # df['side'] = df['side'].map(vals_to_replace)

        df['side'].replace('buy', 1, inplace=True)
        df['side'].replace('sell', -1, inplace=True)
        df['side'].replace('Buy', 1, inplace=True)
        df['side'].replace('Sell', -1, inplace=True)
        df['side'].replace('BUY', 1, inplace=True)
        df['side'].replace('SELL', -1, inplace=True)

        if 'event_type' in df.columns:
            df['event_type'].replace('execution', 'trade', inplace=True)

        # Also assume selected date columns are UTC (eg. benchmark start and finish dates for the orders)
        df = Mediator.get_time_series_ops().localize_cols_as_UTC(df, constants.date_columns, index=True).sort_index()

        df = self.offset_data_ms(df, data_request)

        return df
Beispiel #15
0
    def get_market_data(self, market_request):
        """Gets market data for tickers. When we ask for non-standard FX crosses, only the mid-field is
        returned (calculated as a cross rate). We do not give bid/ask quotes for calculated non-standard tickers, as these
        can difficult to estimate.

        Parameters
        ----------
        market_request : MarketRequest
            The type of market data to get

        Returns
        -------
        DataFrame
        """

        tca_ticker_loader = Mediator.get_tca_ticker_loader(
            version=self._version)

        if isinstance(market_request.ticker, list):
            if len(market_request.ticker) > 1:
                market_request_list = self._split_tca_request_into_list(
                    market_request)

                market_df_dict = {}

                for market_request_single in market_request_list:
                    market_df_dict[market_request.ticker] = \
                        tca_ticker_loader(version=self._version).get_market_data(market_request_single)

        return tca_ticker_loader.get_market_data(market_request)
Beispiel #16
0
    def get_trade_order_holder(self, tca_request):
        """Gets the trades/orders in the form of a TradeOrderHolder

        Parameters
        ----------
        tca_request : TCARequest
            Parameters for the TCA computation

        Returns
        -------
        TradeOrderHolder
        """

        tca_ticker_loader = Mediator.get_tca_ticker_loader(
            version=self._version)

        if isinstance(tca_request.ticker, list):
            if len(tca_request.ticker) > 1:
                tca_request_list = self._split_tca_request_into_list(
                    tca_request)

                trade_order_holder = DataFrameHolder()

                for tca_request_single in tca_request_list:
                    trade_order_holder.add_dataframe_holder(
                        tca_ticker_loader(version=self._version).
                        get_trade_order_holder(tca_request_single))

        return tca_ticker_loader(
            version=self._version).get_trade_order_holder(tca_request)
Beispiel #17
0
    def calculate_benchmark(self,
                            trade_order_df=None,
                            market_df=None,
                            trade_order_name=None,
                            market_resample_freq=None,
                            market_resample_unit=None,
                            market_offset_ms=None,
                            resample_how=None):

        # if not (self._check_calculate_benchmark(trade_order_name=trade_order_name)): return trade_order_df, market_df

        if market_resample_freq is None:
            market_resample_freq = self._market_resample_freq
        if market_resample_unit is None:
            market_resample_unit = self._market_resample_unit
        if market_offset_ms is None: market_offset_ms = self._market_offset_ms
        if resample_how is None: resample_how = self._resample_how

        if market_offset_ms is not None:
            market_df.index = market_df.index + timedelta(
                milliseconds=market_offset_ms)

        if market_resample_freq is not None and market_resample_unit is not None:
            market_df = Mediator.get_time_series_ops().resample_time_series(
                market_df,
                resample_amount=market_resample_freq,
                how=resample_how,
                unit=market_resample_unit)

        return trade_order_df, market_df
Beispiel #18
0
    def keys(self):
        """Returns the names of all the market or trades/orders stored internally.

        Returns
        -------
        str (list)
        """
        return Mediator.get_util_func().dict_key_list(self._df_dict.keys())
Beispiel #19
0
    def calculate_benchmark(self,
                            trade_order_df=None,
                            market_df=None,
                            trade_order_name=None,
                            market_resample_freq=None,
                            market_resample_unit=None,
                            market_offset_ms=None,
                            resample_how=None,
                            price_field=None,
                            volume_field=None):

        # if not (self._check_calculate_benchmark(trade_order_name=trade_order_name)): return trade_order_df, market_df

        if market_resample_freq is None:
            market_resample_freq = self._market_resample_freq
        if market_resample_unit is None:
            market_resample_unit = self._market_resample_unit
        if market_offset_ms is None: market_offset_ms = self._market_offset_ms
        if resample_how is None: resample_how = self._resample_how
        if price_field is None: price_field = self._price_field
        if volume_field is None: volume_field = self._volume_field

        if market_offset_ms is not None:
            market_df.index = market_df.index + timedelta(
                milliseconds=market_offset_ms)

        if market_resample_freq is not None and market_resample_unit is not None:
            if not (isinstance(resample_how, list)):
                resample_how = [resample_how]

            market_df_list = []

            for how in resample_how:
                market_df_list.append(
                    Mediator.get_time_series_ops().resample_time_series(
                        market_df,
                        resample_amount=market_resample_freq,
                        how=how,
                        unit=market_resample_unit,
                        price_field=price_field,
                        volume_field=volume_field))

            market_df = Mediator.get_time_series_ops().outer_join(
                market_df_list)

        return trade_order_df, market_df
Beispiel #20
0
    def __init__(self, trade_order_list=None):
        # self.logger = LoggerManager().getLogger(__name__)
        if not (isinstance(trade_order_list,
                           list)) and trade_order_list is not None:
            trade_order_list = [trade_order_list]

        self._trade_order_list = trade_order_list
        self._time_series_ops = Mediator.get_time_series_ops()
Beispiel #21
0
    def get_market_data(self, market_request, return_cache_handles=False):
        # Handles returns a pointer

        volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine)

        cache = True

        # Don't attempt to cache DataFrames
        if hasattr(market_request, 'market_data_store'):
            if (isinstance(market_request.market_data_store, pd.DataFrame)):
                cache = False
        elif isinstance(market_request.data_store, pd.DataFrame):
            cache = False

        # If we have allowed the caching of monthly/periodic market data
        if market_request.multithreading_params['cache_period_market_data'] and cache:
            old_start_date = market_request.start_date;
            old_finish_date = market_request.finish_date

            # so we can also take TCARequest objects
            if hasattr(market_request, 'market_data_store'):
                data_store = market_request.market_data_store
                data_offset_ms = market_request.market_data_offset_ms
            else:
                data_store = market_request.data_store
                data_offset_ms = market_request.data_offset_ms

            # See if we can fetch from the cache (typically Redis)
            start_date, finish_date, market_key, market_df = \
                volatile_cache.get_data_request_cache(market_request, data_store, 'market_df',
                                                     data_offset_ms)

            # If data is already cached, just return the existing CacheHandle (which is like a pointer to the reference
            # in Redis)
            if market_df is not None and start_date == old_start_date and finish_date == old_finish_date and return_cache_handles:
                return CacheHandle(market_key, add_time_expiry=False)

            if market_df is None:

                market_request_copy = MarketRequest(market_request=market_request)
                market_request_copy.start_date = start_date
                market_request_copy.finish_date = finish_date

                market_df = super(TCATickerLoaderImpl, self).get_market_data(market_request_copy)

                volatile_cache.put_data_request_cache(market_request_copy, market_key, market_df)

            market_df = self._strip_start_finish_dataframe(market_df, old_start_date, old_finish_date, market_request)
        else:
            market_df = super(TCATickerLoaderImpl, self).get_market_data(market_request)

        # Return as a cache handle (which can be easily passed across Celery for example)
        # Only if use_multithreading
        if return_cache_handles and market_request.use_multithreading:
            return volatile_cache.put_dataframe_handle(market_df,
                use_cache_handles=market_request.multithreading_params['cache_period_market_data'])

        return market_df
Beispiel #22
0
    def __init__(self, version=constants.tcapy_version):
        self._util_func = UtilFunc()

        self._tca_market_trade_loader = Mediator.get_tca_market_trade_loader(version=version)
        self._time_series_ops = TimeSeriesOps()
        self._trade_order_tag = TradeOrderFilterTag()

        logger = LoggerManager.getLogger(__name__)
        logger.info("Init TCAEngine version: " + self._tca_market_trade_loader.get_tca_version() + " - Env: " + constants.env)
Beispiel #23
0
def test_results_form_average(fill_market_trade_databases):
    """Tests averages are calculated correctly by ResultsForm, compared to a direct calculation
    """
    Mediator.get_volatile_cache().clear_cache()

    market_df, trade_df, order_df = get_sample_data()

    trade_df, _ = MetricSlippage().calculate_metric(trade_order_df=trade_df,
                                                    market_df=market_df,
                                                    bid_benchmark='mid',
                                                    ask_benchmark='mid')

    results_form = BarResultsForm(
        market_trade_order_list=['trade_df'],
        metric_name='slippage',
        aggregation_metric='mean',
        aggregate_by_field=['ticker', 'venue'],
        scalar=10000.0,
        weighting_field='executed_notional_in_reporting_currency')

    results_df = results_form.aggregate_results(
        market_trade_order_df=trade_df,
        market_df=market_df,
        market_trade_order_name='trade_df')

    slippage_average = float(results_df[0][0].values[0])

    # Directly calculate slippage
    def grab_slippage(trade_df):
        return 10000.0 * ((trade_df['slippage'] * trade_df['executed_notional_in_reporting_currency']).sum() \
                   / trade_df['executed_notional_in_reporting_currency'].sum())

    slippage_average_comp = grab_slippage(trade_df)

    # Check the average slippage
    assert slippage_average - slippage_average_comp < eps

    slippage_average_venue = results_df[1][0]['venue'][venue_filter]

    slippage_average_venue_comp = grab_slippage(
        trade_df[trade_df['venue'] == venue_filter])

    # Check the average slippage by venue
    assert slippage_average_venue - slippage_average_venue_comp < eps
Beispiel #24
0
def test_write_market_data_arctic():
    """Tests we can write market data to Arctic
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV)
    db_start_date = '01 Jan 2016'
    db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    replace_append = ['replace', 'append']

    # Check first when replacing full table and then appending
    for a in arctic_lib_type:
        for i in replace_append:

            database_source = DatabaseSourceArctic(postfix='testharness',
                                                   arctic_lib_type=a)

            # Write CSV to Arctic
            database_source.convert_csv_to_table(
                csv_market_data_store,
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table=i,
                if_exists_ticker='replace',
                market_trade_data='market',
                remove_duplicates=False)

            # Fetch data directly from CSV
            database_source_csv = DatabaseSourceCSV(
                market_data_database_csv=csv_market_data_store)

            market_df_csv = database_source_csv.fetch_market_data(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker)

            # Read back data from Arctic and compare with CSV
            market_request = MarketRequest(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker,
                data_store=
                database_source,  # test_harness_arctic_market_data_store,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            diff_df = market_df_csv['mid'] - market_df_load['mid']

            diff_df.to_csv('test' + i + '.csv')
            assert all(diff_df < eps)
Beispiel #25
0
def test_fetch_market_data_db():
    """Tests that we can fetch data from Arctic/KDB/InfluxDB. Note you need to populate the database first before running this for
    the desired dates.
    """
    market_loader = Mediator.get_tca_market_trade_loader()

    market_data_store_list, market_data_database_table_list = _get_db_market_data_store(
    )

    for market_data_store, market_data_database_table in zip(
            market_data_store_list, market_data_database_table_list):
        market_request = MarketRequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=ticker,
            data_store=market_data_store,
            market_data_database_table=market_data_database_table)

        market_df = market_loader.get_market_data(market_request)

        try:
            market_df = Mediator.get_volatile_cache().get_dataframe_handle(
                market_df)
        except:
            pass

        assert not(market_df.empty) \
               and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
               and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

        market_request.start_date = invalid_start_date
        market_request.finish_date = invalid_finish_date

        market_empty_df = market_loader.get_market_data(market_request)

        try:
            market_empty_df = Mediator.get_volatile_cache(
            ).get_dataframe_handle(market_empty_df)
        except:
            pass

        assert market_empty_df.empty
Beispiel #26
0
def test_write_market_data_db():
    """Tests we can write market data to KDB/Influxdb/PyStore
    """

    database_source_list, test_harness_market_data_table_list, test_harness_data_store_list = _get_db_market_database_source(
    )

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    for i in range(0, len(database_source_list)):

        database_source = database_source_list[i]
        test_harness_market_data_table = test_harness_market_data_table_list[i]
        test_harness_data_store = test_harness_data_store_list[i]

        ### Test we can read data from CSV and dump to InfluxDB/KDB/PyStore (and when read back it matches CSV)
        db_start_date = '01 Jan 2016'
        db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

        replace_append = ['replace', 'append']

        database_source_csv = DatabaseSourceCSV(
            market_data_database_csv=csv_market_data_store)

        market_df_csv = database_source_csv.fetch_market_data(
            start_date=db_start_date,
            finish_date=db_finish_date,
            ticker=ticker)

        # Check first when replacing full table and then appending (will still replace ticker though)
        for i in replace_append:

            database_source.convert_csv_to_table(
                csv_market_data_store,
                ticker,
                test_harness_market_data_table,
                if_exists_table=i,
                if_exists_ticker='replace',
                market_trade_data='market',
                remove_duplicates=False)

            market_request = MarketRequest(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker,
                data_store=test_harness_data_store,
                market_data_database_table=test_harness_market_data_table)

            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            diff_df = market_df_csv['mid'] - market_df_load['mid']

            assert all(diff_df < eps)
Beispiel #27
0
def test_write_chunked_market_data_arctic():
    """For very large CSV files we might need to read them in chunks. tcapy supports this and also supports CSVs
    which are sorted in reverse (ie. descending). We need to enable chunking and reverse reading with flags.

    This tests whether chunked data is written correctly to Arctic, comparing it with that read from CSV directly
    """

    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    # load data from CSVs directly (for comparison later)
    market_df_csv_desc = DatabaseSourceCSV(market_data_database_csv=csv_reverse_market_data_store).fetch_market_data(
        start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker)

    market_df_csv_asc = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store).fetch_market_data(
        start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker)

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a)

        ### write CSV data to Arctic which is sorted ascending (default!)
        database_source.convert_csv_to_table(csv_market_data_store, ticker,
                                             test_harness_arctic_market_data_table,
                                             if_exists_table='replace',
                                             if_exists_ticker='replace', market_trade_data='market',
                                             csv_read_chunksize=100000, remove_duplicates=False)

        market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker,
                                       data_store=test_harness_arctic_market_data_store,
                                       market_data_database_table=test_harness_arctic_market_data_table)

        market_df_load = market_loader.get_market_data(market_request=market_request)

        # compare reading directly from the CSV vs. reading back from arctic
        assert all(market_df_csv_asc['mid'] - market_df_load['mid'] < eps)

        ### write CSV data to Arctic which is sorted descending
        database_source.convert_csv_to_table(csv_reverse_market_data_store, ticker,
                                             test_harness_arctic_market_data_table,
                                             if_exists_table='append',
                                             if_exists_ticker='replace', market_trade_data='market',
                                             csv_read_chunksize=100000, read_in_reverse=True, remove_duplicates=False)

        market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker,
                                       data_store=test_harness_arctic_market_data_store,
                                       market_data_database_table=test_harness_arctic_market_data_table)

        market_df_load = market_loader.get_market_data(market_request=market_request)

        # compare reading directly from the CSV vs. reading back from arctic
        assert all(market_df_csv_desc['mid'] - market_df_load['mid'] < eps)
Beispiel #28
0
    def calculate_metrics_single_ticker(self, market_trade_order_combo,
                                        tca_request, dummy_market):
        volatile_cache = Mediator.get_volatile_cache(version=self._version)

        market_df, trade_order_df_values, ticker, trade_order_df_keys \
            = super(TCATickerLoaderImpl, self).calculate_metrics_single_ticker(market_trade_order_combo, tca_request, dummy_market)

        # Return as a cache handle (which can be easily passed across Celery for example) or not for the market
        # and trade/order data
        return volatile_cache.put_dataframe_handle(market_df, tca_request.multithreading_params['return_cache_handles_market_data']), \
                volatile_cache.put_dataframe_handle(trade_order_df_values, tca_request.multithreading_params['return_cache_handles_trade_data']), \
                ticker, trade_order_df_keys
Beispiel #29
0
def test_append_market_data_arctic():
    """Tests we can append market data to arctic (we will have already written data to the test harness database)
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can append (non-overlapping) data to Arctic
    arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    # use this market request later when reading back from Arctic
    market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker,
                                   data_store=test_harness_arctic_market_data_store,
                                   market_data_database_table=test_harness_arctic_market_data_table)

    # load data from CSV for comparison later
    database_source_csv = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store)

    market_df_csv = database_source_csv.fetch_market_data(
        start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker)

    market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=2)

    for a in arctic_lib_type:

        database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a)

        market_df_lower = market_df_list[0];
        market_df_higher = market_df_list[1]

        database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_arctic_market_data_table,
                                           if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False)

        overlap_error = False

        ## Try to append overlapping data (this will fail!)
        try:
            database_source.append_market_data(market_df_lower, ticker,
                                               table_name=test_harness_arctic_market_data_table,
                                               if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)
        except ErrorWritingOverlapDataException as e:
            overlap_error = True

        assert overlap_error

        # Append non-overlapping data which follows (writing overlapping data into Arctic will mess up the datastore!)
        database_source.append_market_data(market_df_higher, ticker, table_name=test_harness_arctic_market_data_table,
                                           if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)

        market_df_all_read_back = market_loader.get_market_data(market_request=market_request)

        assert all(market_df_all_read_back['mid'] - market_df_csv['mid'] < eps)
Beispiel #30
0
    def normalize_market_data(self, df, dataset, data_request):
        df = Mediator.get_time_series_ops().localize_as_UTC(df)

        # For each dataset have a different field mapping (get field mapping for that dataset from stored CSV files)

        # Convert vendor specific field names to the Cuemacro names

        # Convert vendor specific asset names (eg. GBP=) to Cuemacro standard names (GBPUSD)

        # The dataset is very dense, we assume it is stored on disk ordered (Arctic only allows this)
        # df = df.sort_index()

        return self.offset_data_ms(df, data_request)