Ejemplo n.º 1
0
    def _split_tca_request_by_date(self,
                                   tca_request,
                                   tick,
                                   split_dates=True,
                                   period='month'):

        tca_request_list = []

        dates = []

        # Break up dates into day/week/month chunks - our cache works on day/week/month chunks (can specify in constants)
        # Typically day chunks seem optimal
        # Careful to floor dates for midnight for caching purposes
        if split_dates:
            if period == 'month':
                split_dates_freq = 'MS'
            elif period == 'week':
                split_dates_freq = 'W-MON'
            elif period == 'day':
                split_dates_freq = 'D'

            start_date_floored = self._util_func.floor_tick_of_date(
                tca_request.start_date)
            finish_date_floored = self._util_func.floor_tick_of_date(
                tca_request.finish_date, add_day=True)

            dates = pd.date_range(start=start_date_floored,
                                  end=finish_date_floored,
                                  freq=split_dates_freq).tolist()

        # Add start date and finish date if necessary
        # if len(dates) > 0:
        #     if start_date_floored < dates[0]:
        #         dates.insert(0, start_date_floored)
        #
        #     if finish_date_floored > dates[-1]:
        #         dates.append(finish_date_floored)
        # else:
        #     dates = [start_date_floored, finish_date_floored]

        logger = LoggerManager().getLogger(__name__)

        # If our start/finish date ends up being more than a month
        # eg. Jan 8th - Mar 7th - split into
        # Jan 8th - Jan 31st 23:59:59.999, Feb 1st 00:00:00.000 - Feb 28th 23:59:59.999 etc
        if len(dates) > 0:

            # For the very first chunk in our series
            if tca_request.start_date < dates[0]:
                tca_request_temp = TCARequest(tca_request=tca_request)
                tca_request_temp.ticker = tick
                tca_request_temp.start_date = tca_request.start_date
                tca_request_temp.finish_date = dates[0] - timedelta(
                    microseconds=1)

                tca_request_list.append(tca_request_temp)

            # For full months in between during our request
            for i in range(0, len(dates) - 1):
                tca_request_temp = TCARequest(tca_request=tca_request)
                tca_request_temp.ticker = tick
                tca_request_temp.start_date = dates[i]
                tca_request_temp.finish_date = dates[i + 1] - timedelta(
                    microseconds=1)

                tca_request_list.append(tca_request_temp)

            # For the very last chunk of our series
            if dates[-1] < tca_request.finish_date:
                tca_request_temp = TCARequest(tca_request=tca_request)
                tca_request_temp.ticker = tick
                tca_request_temp.start_date = dates[-1]
                tca_request_temp.finish_date = tca_request.finish_date

                tca_request_list.append(tca_request_temp)
        else:
            tca_request_temp = TCARequest(tca_request=tca_request)
            tca_request_temp.ticker = tick

            tca_request_list.append(tca_request_temp)

        date_str = ''

        for t in tca_request_list:
            date_str = date_str + ' / ' + str(t.start_date) + ' to ' + str(
                t.finish_date)

        logger.debug("Split TCA request for " + str(tca_request.ticker) +
                     " dates " + date_str + " from original request " +
                     str(tca_request.start_date) + ' to ' +
                     str(tca_request.finish_date))

        return tca_request_list
Ejemplo n.º 2
0
    def get_trade_order_data(self, tca_request, trade_order_type, start_date=None, finish_date=None, return_cache_handles=True):
        # return_cache_handles returns a pointer

        logger = LoggerManager().getLogger(__name__)
        volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine)

        # by default, assume we want trade data (rather than order data)
        if trade_order_type is None:
            trade_order_type = 'trade_df'

        trade_order_contents = tca_request.trade_order_mapping[trade_order_type]

        cache = True

        # Don't attempt to catch DataFrames (or CSVs of trades)
        if isinstance(trade_order_contents, pd.DataFrame):
            cache = False
        elif isinstance(trade_order_contents, str):
            if 'csv' in trade_order_contents:
                cache = False

        # If we have allowed the caching of monthly/weekly trade data
        if tca_request.multithreading_params['cache_period_trade_data'] and cache:
            old_start_date = tca_request.start_date; old_finish_date = tca_request.finish_date

            # See if we can fetch from the cache (usually Redis)
            start_date, finish_date, trade_key, trade_df = \
                volatile_cache.get_data_request_cache(
                    tca_request, tca_request.trade_data_store, trade_order_type, tca_request.trade_data_offset_ms)

            # If data is already cached, just return the existing CacheHandle
            if trade_df is not None and start_date == old_start_date and finish_date == old_finish_date:
                return CacheHandle(trade_key, add_time_expiry=False)

            # If it wasn't in the cache then fetch it and push into the cache
            if trade_df is None:
                logger.debug('Key not found for ' + trade_key + ".. now need to load")

                # Call the superclass (get back DataFrames not return_cache_handles)
                trade_df = super(TCATickerLoaderImpl, self).get_trade_order_data(tca_request, trade_order_type,
                                                                                 start_date=start_date,
                                                                                 finish_date=finish_date)

                # Cache this periodic monthly/weekly data
                volatile_cache.put_data_request_cache(tca_request, trade_key, trade_df)

            # Strip off the start/finish dates (because when we load from cache, we get full months)
            trade_df = self._strip_start_finish_dataframe(trade_df, start_date, finish_date, tca_request)
        else:
            if start_date is None or finish_date is None:
                start_date = tca_request.start_date
                finish_date = tca_request.finish_date

            # Call the superclass (get back DataFrames not return_cache_handles)
            trade_df = super(TCATickerLoaderImpl, self).get_trade_order_data(tca_request, trade_order_type,
                                                                             start_date=start_date,
                                                                             finish_date=finish_date)

        if return_cache_handles and tca_request.use_multithreading:
            # Return as a cache handle (which can be easily passed across Celery for example)
            return volatile_cache.put_dataframe_handle(trade_df,
                                                       use_cache_handles=tca_request.multithreading_params['cache_period_trade_data'])

        return trade_df