def _split_tca_request_by_date(self, tca_request, tick, split_dates=True, period='month'): tca_request_list = [] dates = [] # Break up dates into day/week/month chunks - our cache works on day/week/month chunks (can specify in constants) # Typically day chunks seem optimal # Careful to floor dates for midnight for caching purposes if split_dates: if period == 'month': split_dates_freq = 'MS' elif period == 'week': split_dates_freq = 'W-MON' elif period == 'day': split_dates_freq = 'D' start_date_floored = self._util_func.floor_tick_of_date( tca_request.start_date) finish_date_floored = self._util_func.floor_tick_of_date( tca_request.finish_date, add_day=True) dates = pd.date_range(start=start_date_floored, end=finish_date_floored, freq=split_dates_freq).tolist() # Add start date and finish date if necessary # if len(dates) > 0: # if start_date_floored < dates[0]: # dates.insert(0, start_date_floored) # # if finish_date_floored > dates[-1]: # dates.append(finish_date_floored) # else: # dates = [start_date_floored, finish_date_floored] logger = LoggerManager().getLogger(__name__) # If our start/finish date ends up being more than a month # eg. Jan 8th - Mar 7th - split into # Jan 8th - Jan 31st 23:59:59.999, Feb 1st 00:00:00.000 - Feb 28th 23:59:59.999 etc if len(dates) > 0: # For the very first chunk in our series if tca_request.start_date < dates[0]: tca_request_temp = TCARequest(tca_request=tca_request) tca_request_temp.ticker = tick tca_request_temp.start_date = tca_request.start_date tca_request_temp.finish_date = dates[0] - timedelta( microseconds=1) tca_request_list.append(tca_request_temp) # For full months in between during our request for i in range(0, len(dates) - 1): tca_request_temp = TCARequest(tca_request=tca_request) tca_request_temp.ticker = tick tca_request_temp.start_date = dates[i] tca_request_temp.finish_date = dates[i + 1] - timedelta( microseconds=1) tca_request_list.append(tca_request_temp) # For the very last chunk of our series if dates[-1] < tca_request.finish_date: tca_request_temp = TCARequest(tca_request=tca_request) tca_request_temp.ticker = tick tca_request_temp.start_date = dates[-1] tca_request_temp.finish_date = tca_request.finish_date tca_request_list.append(tca_request_temp) else: tca_request_temp = TCARequest(tca_request=tca_request) tca_request_temp.ticker = tick tca_request_list.append(tca_request_temp) date_str = '' for t in tca_request_list: date_str = date_str + ' / ' + str(t.start_date) + ' to ' + str( t.finish_date) logger.debug("Split TCA request for " + str(tca_request.ticker) + " dates " + date_str + " from original request " + str(tca_request.start_date) + ' to ' + str(tca_request.finish_date)) return tca_request_list
def get_trade_order_data(self, tca_request, trade_order_type, start_date=None, finish_date=None, return_cache_handles=True): # return_cache_handles returns a pointer logger = LoggerManager().getLogger(__name__) volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine) # by default, assume we want trade data (rather than order data) if trade_order_type is None: trade_order_type = 'trade_df' trade_order_contents = tca_request.trade_order_mapping[trade_order_type] cache = True # Don't attempt to catch DataFrames (or CSVs of trades) if isinstance(trade_order_contents, pd.DataFrame): cache = False elif isinstance(trade_order_contents, str): if 'csv' in trade_order_contents: cache = False # If we have allowed the caching of monthly/weekly trade data if tca_request.multithreading_params['cache_period_trade_data'] and cache: old_start_date = tca_request.start_date; old_finish_date = tca_request.finish_date # See if we can fetch from the cache (usually Redis) start_date, finish_date, trade_key, trade_df = \ volatile_cache.get_data_request_cache( tca_request, tca_request.trade_data_store, trade_order_type, tca_request.trade_data_offset_ms) # If data is already cached, just return the existing CacheHandle if trade_df is not None and start_date == old_start_date and finish_date == old_finish_date: return CacheHandle(trade_key, add_time_expiry=False) # If it wasn't in the cache then fetch it and push into the cache if trade_df is None: logger.debug('Key not found for ' + trade_key + ".. now need to load") # Call the superclass (get back DataFrames not return_cache_handles) trade_df = super(TCATickerLoaderImpl, self).get_trade_order_data(tca_request, trade_order_type, start_date=start_date, finish_date=finish_date) # Cache this periodic monthly/weekly data volatile_cache.put_data_request_cache(tca_request, trade_key, trade_df) # Strip off the start/finish dates (because when we load from cache, we get full months) trade_df = self._strip_start_finish_dataframe(trade_df, start_date, finish_date, tca_request) else: if start_date is None or finish_date is None: start_date = tca_request.start_date finish_date = tca_request.finish_date # Call the superclass (get back DataFrames not return_cache_handles) trade_df = super(TCATickerLoaderImpl, self).get_trade_order_data(tca_request, trade_order_type, start_date=start_date, finish_date=finish_date) if return_cache_handles and tca_request.use_multithreading: # Return as a cache handle (which can be easily passed across Celery for example) return volatile_cache.put_dataframe_handle(trade_df, use_cache_handles=tca_request.multithreading_params['cache_period_trade_data']) return trade_df