Exemple #1
0
    def get_trade_order_holder(self, tca_request):
        logger = LoggerManager.getLogger(__name__)

        logger.debug(
            "Get trade order holder for " + str(tca_request.ticker) + " from " + str(tca_request.start_date)
            + " - " + str(tca_request.finish_date))

        # Get all the trade/orders which have been requested, eg. trade_df and order_df
        # do separate calls given they are assumed to be stored in different database tables
        trade_order_holder = DataFrameHolder()

        if tca_request.trade_order_mapping is not None:
            for trade_order_type in tca_request.trade_order_mapping:
                trade_order_df = self.get_trade_order_data(tca_request, trade_order_type)

                trade_order_holder.add_dataframe(trade_order_df, trade_order_type)

        return trade_order_holder
Exemple #2
0
def test_data_frame_holder():
    """Tests the storing of DataFrameHolder object which is like an enhanced dict specifically for storing DataFrames,
    alongside using the VolatileCache
    """
    from tcapy.analysis.dataframeholder import DataFrameHolder
    from tcapy.data.volatilecache import VolatileRedis as VolatileCache
    volatile_cache = VolatileCache()

    # Create a very large DataFrame, which needs to be chunked in storage
    dt = pd.date_range(start='01 Jan 2000', end='05 Jan 2020', freq='10s')
    df = pd.DataFrame(index=dt, columns=['bid', 'mid', 'ask'])

    df['bid'] = np.ones(len(dt))
    df['mid'] = np.ones(len(dt))
    df['ask'] = np.ones(len(dt))

    df_list = TimeSeriesOps().split_array_chunks(df, chunks=2)
    df_lower = df_list[0]
    df_higher = df_list[1]

    for i in ['_comp', '']:
        df_holder = DataFrameHolder()

        df_holder.add_dataframe(
            volatile_cache.put_dataframe_handle(df_lower,
                                                use_cache_handles=True),
            'EURUSD_df' + i)
        df_holder.add_dataframe(
            volatile_cache.put_dataframe_handle(df_higher,
                                                use_cache_handles=True),
            'EURUSD_df' + i)

        df_dict = df_holder.get_combined_dataframe_dict()

        df_final = df_dict['EURUSD_df' + i]

    assert_frame_equal(df, df_final)
Exemple #3
0
    def _parallel_get_market_trade_metrics(self, tca_request_list,
                                           dummy_market):
        logger = LoggerManager.getLogger(__name__)

        market_holder_list = DataFrameHolder()
        trade_order_holder_list = DataFrameHolder()

        # For each currency pair select collect the trades and market data, then calculate benchmarks and slippage
        result = []

        keep_looping = True

        # If we have also asked for trades/order
        if tca_request_list[0].trade_order_mapping is not None:
            point_in_time_executions_only = \
                self._util_func.dict_key_list(tca_request_list[0].trade_order_mapping) == ['trade_df']
        else:
            point_in_time_executions_only = True

        parallel_library = tca_request_list[0].multithreading_params[
            'parallel_library']

        if parallel_library == 'single':
            # from tcapy.analysis.tcatickerloaderimpl import TCATickerLoaderImpl
            tca_ticker_loader = Mediator.get_tca_ticker_loader(
                version=self._version)

        start_date = tca_request_list[0].start_date
        finish_date = tca_request_list[0].finish_date

        # Parameters for the loop
        i = 0
        no_of_tries = 5

        # Error trapping for Celery, if have failed event retry it
        while i < no_of_tries and keep_looping:

            try:
                # For each TCA request kick off a thread
                for tca_request_single_ticker in tca_request_list:

                    # Split up the request by date (monthly/weekly chunks)
                    tca_request_date_split = self._split_tca_request_by_date(
                        tca_request_single_ticker,
                        tca_request_single_ticker.ticker,
                        period=tca_request_single_ticker.
                        multithreading_params['cache_period'])

                    if not(constants.multithreading_params['splice_request_by_dates']) \
                                or tca_request_list[0].tca_type == 'detailed' \
                                or tca_request_list[0].tca_type == 'compliance' \
                                or tca_request_list[0].summary_display == 'candlestick'\
                                or not(point_in_time_executions_only):

                        if 'celery' in parallel_library:
                            # Load all the data for this ticker and THEN calculate the metrics on it
                            result.append(
                                chord(
                                    (get_market_trade_holder_via_celery.s(
                                        tca_request_data) for tca_request_data
                                     in tca_request_date_split),
                                    calculate_metrics_single_ticker_via_celery.
                                    s(tca_request_single_ticker,
                                      dummy_market)).apply_async())
                        elif parallel_library == 'single':
                            # This is not actually parallel, but is mainly for debugging purposes
                            for tca_request_s in tca_request_date_split:

                                # print(tca_request_s.start_date)
                                market_df, trade_order_df_dict = tca_ticker_loader.get_market_trade_order_holder(
                                    tca_request_s, return_cache_handles=False)

                                market_df, trade_order_df_list, ticker, trade_order_keys = \
                                    tca_ticker_loader.calculate_metrics_single_ticker((market_df, trade_order_df_dict),
                                                                                        tca_request_s, dummy_market)

                                market_holder_list.add_dataframe(
                                    market_df, ticker)

                                trade_order_holder_list.add_dataframe_dict(
                                    dict(
                                        zip(trade_order_keys,
                                            trade_order_df_list)))

                    else:
                        # Otherwise work on parallel chunks by date
                        # doesn't currently work with orders which straddle day/week/month boundaries
                        # but should work with points in time
                        #
                        # In practice, it's not really much faster than the above code
                        if 'celery' == parallel_library:

                            # For each ticker/date combination load data and process chunk (so can do fully in parallel)
                            result.append(
                                group(
                                    get_market_trade_holder_and_calculate_metrics_single_ticker_via_celery
                                    .s(tca_request_data, dummy_market)
                                    for tca_request_data in
                                    tca_request_date_split).apply_async())

                # Now combine the results from the parallel operations, if using celery
                if 'celery' in parallel_library:

                    # Careful, when the output is empty!
                    output = [
                        p.get(timeout=constants.celery_timeout_seconds)
                        for p in result if p is not None
                    ]

                    # If pipelined/splice_request_by_dates will have two lists so flatten it into one
                    output = self._util_func.flatten_list_of_lists(output)

                    for market_df, trade_order_df_list, ticker, trade_order_keys in output:
                        market_holder_list.add_dataframe(market_df, ticker)
                        # market_df_dict[ticker] = market_df

                        trade_order_holder_list.add_dataframe_dict(
                            dict(zip(trade_order_keys, trade_order_df_list)))

                    del result
                    del output

                keep_looping = False

            except DateException as e:
                raise e

                keep_looping = False

            except TradeMarketNonOverlapException as e:
                raise e

                keep_looping = False

            except DataMissingException as e:
                raise e

                keep_looping = False

            except ErrorWritingOverlapDataException as e:
                raise e

                keep_looping = False

            # Exception likely related to Celery and possibly lack of communication with Redis message broker
            # or Memcached results backend
            # except Exception as e:
            except Exception as e:
                if i == no_of_tries - 1:
                    err_msg = "Failed with " + parallel_library + " after multiple attempts: " + str(
                        e) + ", " + str(traceback.format_exc())

                    raise Exception(err_msg)

                i = i + 1

                logger.warning("Failed with " + parallel_library +
                               ", trying again for " + str(i) + " time: " +
                               str(e) + ", " + str(traceback.format_exc()))

        logger.debug("Finished parallel computation")

        # Expand out the DataFrame holders into dictionaries of DataFrames
        market_df_dict = market_holder_list.get_combined_dataframe_dict()
        trade_order_results_df_dict = trade_order_holder_list.get_combined_dataframe_dict(
            start_date=start_date, finish_date=finish_date)

        # TODO add candlestick drawing here for cases when using split threading by date
        trade_order_results_df_dict = self._util_func.remove_keymatch_dict(
            trade_order_results_df_dict, 'market_df_downsampled')

        return market_df_dict, trade_order_results_df_dict