Esempio n. 1
0
    def get_trade_order_holder(self, tca_request):
        """Gets the trades/orders in the form of a TradeOrderHolder

        Parameters
        ----------
        tca_request : TCARequest
            Parameters for the TCA computation

        Returns
        -------
        TradeOrderHolder
        """

        tca_ticker_loader = Mediator.get_tca_ticker_loader(
            version=self._version)

        if isinstance(tca_request.ticker, list):
            if len(tca_request.ticker) > 1:
                tca_request_list = self._split_tca_request_into_list(
                    tca_request)

                trade_order_holder = DataFrameHolder()

                for tca_request_single in tca_request_list:
                    trade_order_holder.add_dataframe_holder(
                        tca_ticker_loader(version=self._version).
                        get_trade_order_holder(tca_request_single))

        return tca_ticker_loader(
            version=self._version).get_trade_order_holder(tca_request)
Esempio n. 2
0
    def _convert_tuple_to_market_trade(self, market_trade_order_tuple):
        volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine)

        # Gather market and trade/order data (which might be stored in a list)
        if isinstance(market_trade_order_tuple, list):
            market_df_list = []
            trade_order_holder = DataFrameHolder()

            for market_df_single, trade_order_holder_single in market_trade_order_tuple:
                market_df_list.append(market_df_single)

                trade_order_holder.add_dataframe_holder(trade_order_holder_single)

            market_df_list = volatile_cache.get_dataframe_handle(market_df_list, burn_after_reading=True)

            # to ensure that any spurious/None elements are removed
            market_df_list = [x for x in market_df_list if isinstance(x, pd.DataFrame)]

            # want to make sure the data is properly ordered too (not guarenteed we'll get it back in right order)
            market_df = self._time_series_ops.concat_dataframe_list(market_df_list)

        else:
            market_df = volatile_cache.get_dataframe_handle(market_trade_order_tuple[0], burn_after_reading=True)
            trade_order_holder = market_trade_order_tuple[1]

        return market_df, trade_order_holder
    def _get_market_trade_metrics(self, tca_request_list, dummy_market):
        # volatile_cache = Mediator.get_volatile_cache(version=self._version)

        if tca_request_list is None:
            return {}, DataFrameHolder()

        if len(tca_request_list) == 0:
            return {}, DataFrameHolder()

        if tca_request_list[0].use_multithreading:
            market_df_dict, trade_order_results_df_dict = self._parallel_get_market_trade_metrics(
                tca_request_list, dummy_market)

        else:
            return super(TCAMarketTradeLoaderImpl, self)._get_market_trade_metrics(tca_request_list, dummy_market)

        return market_df_dict, trade_order_results_df_dict
Esempio n. 4
0
    def get_trade_order_holder(self, tca_request):
        logger = LoggerManager.getLogger(__name__)

        logger.debug(
            "Get trade order holder for " + str(tca_request.ticker) + " from " + str(tca_request.start_date)
            + " - " + str(tca_request.finish_date))

        # Get all the trade/orders which have been requested, eg. trade_df and order_df
        # do separate calls given they are assumed to be stored in different database tables
        trade_order_holder = DataFrameHolder()

        if tca_request.trade_order_mapping is not None:
            for trade_order_type in tca_request.trade_order_mapping:
                trade_order_df = self.get_trade_order_data(tca_request, trade_order_type)

                trade_order_holder.add_dataframe(trade_order_df, trade_order_type)

        return trade_order_holder
Esempio n. 5
0
    def _get_market_trade_metrics(self, tca_request_list, dummy_market):
        # volatile_cache = Mediator.get_volatile_cache(version=self._version)

        if tca_request_list is None:
            return {}, DataFrameHolder()

        if len(tca_request_list) == 0:
            return {}, DataFrameHolder()

        # Only attempt to execute in parallel if flag has been enabled
        if tca_request_list[0].use_multithreading:
            market_df_dict, trade_order_results_df_dict = self._parallel_get_market_trade_metrics(
                tca_request_list, dummy_market)
        else:
            # Otherwise run without any use_multithreading
            return super(TCAMarketTradeLoaderImpl, self)._get_market_trade_metrics(tca_request_list, dummy_market)

        return market_df_dict, trade_order_results_df_dict
Esempio n. 6
0
    def get_trade_order_holder(self, tca_request):

        tca_ticker_loader = Mediator.get_tca_ticker_loader(
            version=self._version)

        if isinstance(tca_request.ticker, list):
            if len(tca_request.ticker) > 1:
                tca_request_list = self._split_tca_request_into_list(
                    tca_request)

                trade_order_holder = DataFrameHolder()

                for tca_request_single in tca_request_list:
                    trade_order_holder.add_dataframe_holder(
                        tca_ticker_loader(version=self._version).
                        get_trade_order_holder(tca_request_single))

        return tca_ticker_loader(
            version=self._version).get_trade_order_holder(tca_request)
Esempio n. 7
0
    def _get_market_trade_metrics(self, tca_request_list, dummy_market):
        """Gets the market and trade data, as well as computed metrics on them

        Parameters
        ----------
        tca_request_list : TCARequest (list)
            Requests for multiple TCARequests (eg. for different tickers)

        dummy_market : bool
            Return dummy market data?

        Returns
        -------
        DataFrame (dict), DataFrame (dict)
        """

        tca_ticker_loader = Mediator.get_tca_ticker_loader(
            version=self._version)

        market_df_dict = {}

        trade_order_holder_list = DataFrameHolder()

        for tca_request_single in tca_request_list:
            market_df, trade_order_df_dict = tca_ticker_loader.get_market_trade_order_holder(
                tca_request_single)

            market_df, trade_order_df_list, ticker, trade_order_keys = \
                tca_ticker_loader.calculate_metrics_single_ticker((market_df, trade_order_df_dict),
                                                                        tca_request_single, dummy_market)

            market_df_dict[ticker] = market_df

            trade_order_holder_list.add_dataframe_dict(
                dict(zip(trade_order_keys, trade_order_df_list)))

        # Unpack the DataFrameHolder into a dictionary (combining the lists of trade, orders etc. into single dataframes)
        # this may also decompress the trades
        trade_order_results_df_dict = trade_order_holder_list.get_combined_dataframe_dict(
        )

        return market_df_dict, trade_order_results_df_dict
Esempio n. 8
0
def test_data_frame_holder():
    """Tests the storing of DataFrameHolder object which is like an enhanced dict specifically for storing DataFrames,
    alongside using the VolatileCache
    """
    from tcapy.analysis.dataframeholder import DataFrameHolder
    from tcapy.data.volatilecache import VolatileRedis as VolatileCache
    volatile_cache = VolatileCache()

    # Create a very large DataFrame, which needs to be chunked in storage
    dt = pd.date_range(start='01 Jan 2000', end='05 Jan 2020', freq='10s')
    df = pd.DataFrame(index=dt, columns=['bid', 'mid', 'ask'])

    df['bid'] = np.ones(len(dt))
    df['mid'] = np.ones(len(dt))
    df['ask'] = np.ones(len(dt))

    df_list = TimeSeriesOps().split_array_chunks(df, chunks=2)
    df_lower = df_list[0]
    df_higher = df_list[1]

    for i in ['_comp', '']:
        df_holder = DataFrameHolder()

        df_holder.add_dataframe(
            volatile_cache.put_dataframe_handle(df_lower,
                                                use_cache_handles=True),
            'EURUSD_df' + i)
        df_holder.add_dataframe(
            volatile_cache.put_dataframe_handle(df_higher,
                                                use_cache_handles=True),
            'EURUSD_df' + i)

        df_dict = df_holder.get_combined_dataframe_dict()

        df_final = df_dict['EURUSD_df' + i]

    assert_frame_equal(df, df_final)
Esempio n. 9
0
    def _parallel_get_market_trade_metrics(self, tca_request_list,
                                           dummy_market):
        logger = LoggerManager.getLogger(__name__)

        market_holder_list = DataFrameHolder()
        trade_order_holder_list = DataFrameHolder()

        # For each currency pair select collect the trades and market data, then calculate benchmarks and slippage
        result = []

        keep_looping = True

        # If we have also asked for trades/order
        if tca_request_list[0].trade_order_mapping is not None:
            point_in_time_executions_only = \
                self._util_func.dict_key_list(tca_request_list[0].trade_order_mapping) == ['trade_df']
        else:
            point_in_time_executions_only = True

        parallel_library = tca_request_list[0].multithreading_params[
            'parallel_library']

        if parallel_library == 'single':
            # from tcapy.analysis.tcatickerloaderimpl import TCATickerLoaderImpl
            tca_ticker_loader = Mediator.get_tca_ticker_loader(
                version=self._version)

        start_date = tca_request_list[0].start_date
        finish_date = tca_request_list[0].finish_date

        # Parameters for the loop
        i = 0
        no_of_tries = 5

        # Error trapping for Celery, if have failed event retry it
        while i < no_of_tries and keep_looping:

            try:
                # For each TCA request kick off a thread
                for tca_request_single_ticker in tca_request_list:

                    # Split up the request by date (monthly/weekly chunks)
                    tca_request_date_split = self._split_tca_request_by_date(
                        tca_request_single_ticker,
                        tca_request_single_ticker.ticker,
                        period=tca_request_single_ticker.
                        multithreading_params['cache_period'])

                    if not(constants.multithreading_params['splice_request_by_dates']) \
                                or tca_request_list[0].tca_type == 'detailed' \
                                or tca_request_list[0].tca_type == 'compliance' \
                                or tca_request_list[0].summary_display == 'candlestick'\
                                or not(point_in_time_executions_only):

                        if 'celery' in parallel_library:
                            # Load all the data for this ticker and THEN calculate the metrics on it
                            result.append(
                                chord(
                                    (get_market_trade_holder_via_celery.s(
                                        tca_request_data) for tca_request_data
                                     in tca_request_date_split),
                                    calculate_metrics_single_ticker_via_celery.
                                    s(tca_request_single_ticker,
                                      dummy_market)).apply_async())
                        elif parallel_library == 'single':
                            # This is not actually parallel, but is mainly for debugging purposes
                            for tca_request_s in tca_request_date_split:

                                # print(tca_request_s.start_date)
                                market_df, trade_order_df_dict = tca_ticker_loader.get_market_trade_order_holder(
                                    tca_request_s, return_cache_handles=False)

                                market_df, trade_order_df_list, ticker, trade_order_keys = \
                                    tca_ticker_loader.calculate_metrics_single_ticker((market_df, trade_order_df_dict),
                                                                                        tca_request_s, dummy_market)

                                market_holder_list.add_dataframe(
                                    market_df, ticker)

                                trade_order_holder_list.add_dataframe_dict(
                                    dict(
                                        zip(trade_order_keys,
                                            trade_order_df_list)))

                    else:
                        # Otherwise work on parallel chunks by date
                        # doesn't currently work with orders which straddle day/week/month boundaries
                        # but should work with points in time
                        #
                        # In practice, it's not really much faster than the above code
                        if 'celery' == parallel_library:

                            # For each ticker/date combination load data and process chunk (so can do fully in parallel)
                            result.append(
                                group(
                                    get_market_trade_holder_and_calculate_metrics_single_ticker_via_celery
                                    .s(tca_request_data, dummy_market)
                                    for tca_request_data in
                                    tca_request_date_split).apply_async())

                # Now combine the results from the parallel operations, if using celery
                if 'celery' in parallel_library:

                    # Careful, when the output is empty!
                    output = [
                        p.get(timeout=constants.celery_timeout_seconds)
                        for p in result if p is not None
                    ]

                    # If pipelined/splice_request_by_dates will have two lists so flatten it into one
                    output = self._util_func.flatten_list_of_lists(output)

                    for market_df, trade_order_df_list, ticker, trade_order_keys in output:
                        market_holder_list.add_dataframe(market_df, ticker)
                        # market_df_dict[ticker] = market_df

                        trade_order_holder_list.add_dataframe_dict(
                            dict(zip(trade_order_keys, trade_order_df_list)))

                    del result
                    del output

                keep_looping = False

            except DateException as e:
                raise e

                keep_looping = False

            except TradeMarketNonOverlapException as e:
                raise e

                keep_looping = False

            except DataMissingException as e:
                raise e

                keep_looping = False

            except ErrorWritingOverlapDataException as e:
                raise e

                keep_looping = False

            # Exception likely related to Celery and possibly lack of communication with Redis message broker
            # or Memcached results backend
            # except Exception as e:
            except Exception as e:
                if i == no_of_tries - 1:
                    err_msg = "Failed with " + parallel_library + " after multiple attempts: " + str(
                        e) + ", " + str(traceback.format_exc())

                    raise Exception(err_msg)

                i = i + 1

                logger.warning("Failed with " + parallel_library +
                               ", trying again for " + str(i) + " time: " +
                               str(e) + ", " + str(traceback.format_exc()))

        logger.debug("Finished parallel computation")

        # Expand out the DataFrame holders into dictionaries of DataFrames
        market_df_dict = market_holder_list.get_combined_dataframe_dict()
        trade_order_results_df_dict = trade_order_holder_list.get_combined_dataframe_dict(
            start_date=start_date, finish_date=finish_date)

        # TODO add candlestick drawing here for cases when using split threading by date
        trade_order_results_df_dict = self._util_func.remove_keymatch_dict(
            trade_order_results_df_dict, 'market_df_downsampled')

        return market_df_dict, trade_order_results_df_dict