class IntradayBarRequest(Request): def __init__(self, symbol, interval, start=None, end=None, event='TRADE'): """ Intraday bar request for bbg Parameters ---------- symbols : string interval : number of minutes start : start date end : end date (if None then use today) event : (TRADE,BID,ASK,BEST_BID,BEST_ASK) """ Request.__init__(self) self.logger = LoggerManager().getLogger(__name__) assert event in ('TRADE', 'BID', 'ASK', 'BEST_BID', 'BEST_ASK') assert isinstance(symbol, str) if start is None: start = datetime.today() - timedelta(30) if end is None: end = datetime.utcnow() self.symbol = symbol self.interval = interval self.start = to_datetime(start) self.end = to_datetime(end) self.event = event # response related self.response = defaultdict(list) def get_bbg_service_name(self): return '//blp/refdata' def get_bbg_request(self, svc, session): # create the bbg request object start, end = self.start, self.end request = svc.CreateRequest('IntradayBarRequest') request.Set('security', self.symbol) request.Set('interval', self.interval) request.Set('eventType', self.event) request.Set( 'startDateTime', session.CreateDatetime(start.year, start.month, start.day, start.hour, start.minute)) request.Set( 'endDateTime', session.CreateDatetime(end.year, end.month, end.day, end.hour, end.minute)) self.logger.info("Fetching intraday data for " + str(self.symbol) + " from " + start.strftime('%d/%m/%Y') + " to " + end.strftime('%d/%m/%Y')) return request def on_event(self, evt, is_final): """ on_event - This is invoked from in response to COM PumpWaitingMessages - different thread """ response = self.response self.logger.debug("Receiving data from Bloomberg...") for msg in XmlHelper.message_iter(evt): bars = msg.GetElement('barData').GetElement('barTickData') self.logger.debug("Read message...") for i in range(bars.NumValues): bar = bars.GetValue(i) ts = bar.GetElement(0).Value dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute) response['time'].append(dt) response['open'].append(bar.GetElement(1).Value) response['high'].append(bar.GetElement(2).Value) response['low'].append(bar.GetElement(3).Value) response['close'].append(bar.GetElement(4).Value) response['volume'].append(bar.GetElement(5).Value) response['events'].append(bar.GetElement(6).Value) if (i % 20000 == 0): dt_str = dt.strftime('%d/%m/%Y') self.logger.debug("Processing " + dt_str) self.logger.debug("Finished processing for ticker.") if is_final: idx = response.pop('time') self.response = DataFrame( response, columns=['open', 'high', 'low', 'close', 'volume', 'events'], index=idx) self.response.index.name = 'Date' self.response = self.response.astype('float32')
class LoaderBBG(LoaderTemplate): def __init__(self): super(LoaderBBG, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, time_series_request): """ load_ticker - Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- time_series_request : TimeSeriesRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ time_series_request_vendor = self.construct_vendor_time_series_request( time_series_request) data_frame = None self.logger.info("Request Bloomberg data") # do we need daily or intraday data? if (time_series_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']): # for events times/dates separately needs ReferenceDataRequest (when specified) if 'release-date-time-full' in time_series_request.fields: # experimental datetime_data_frame = self.get_reference_data( time_series_request_vendor, time_series_request) # remove fields 'release-date-time-full' from our request (and the associated field in the vendor) index = time_series_request.fields.index( 'release-date-time-full') time_series_request_vendor.fields.pop(index) time_series_request.fields.pop(index) # download all the other event fields (uses HistoricalDataRequest to Bloomberg) # concatenate with date time fields if len(time_series_request_vendor.fields) > 0: events_data_frame = self.get_daily_data( time_series_request, time_series_request_vendor) col = events_data_frame.index.name events_data_frame = events_data_frame.reset_index( drop=False) data_frame = pandas.concat( [events_data_frame, datetime_data_frame], axis=1) temp = data_frame[col] del data_frame[col] data_frame.index = temp else: data_frame = datetime_data_frame # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg else: data_frame = self.get_daily_data(time_series_request, time_series_request_vendor) # assume one ticker only # for intraday data we use IntradayDataRequest to Bloomberg if (time_series_request.freq in ['intraday', 'minute', 'hourly']): time_series_request_vendor.tickers = time_series_request_vendor.tickers[ 0] data_frame = self.download_intraday(time_series_request_vendor) if data_frame is not None: if data_frame.empty: self.logger.info("No tickers returned for: " + time_series_request_vendor.tickers) return None cols = data_frame.columns.values data_frame.tz_localize('UTC') cols = time_series_request.tickers[0] + "." + cols data_frame.columns = cols self.logger.info("Completed request from Bloomberg.") return data_frame def get_daily_data(self, time_series_request, time_series_request_vendor): data_frame = self.download_daily(time_series_request_vendor) # convert from vendor to Thalesians tickers/fields if data_frame is not None: if data_frame.empty: self.logger.info("No tickers returned for...") try: self.logger.info(str(time_series_request_vendor.tickers)) except: pass return None returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) # TODO if empty try downloading again a year later fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker( returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' return data_frame def get_reference_data(self, time_series_request_vendor, time_series_request): end = datetime.datetime.today() end = end.replace(year=end.year + 1) time_series_request_vendor.finish_date = end self.logger.debug("Requesting ref for " + time_series_request_vendor.tickers[0] + " etc.") data_frame = self.download_ref(time_series_request_vendor) self.logger.debug("Waiting for ref...") # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) if data_frame is not None: # TODO if empty try downloading again a year later fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker( returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame = data_frame.convert_objects(convert_dates='coerce', convert_numeric='coerce') return data_frame # implement method in abstract superclass @abc.abstractmethod def kill_session(self): return @abc.abstractmethod def download_intraday(self, time_series_request): return @abc.abstractmethod def download_daily(self, time_series_request): return @abc.abstractmethod def download_ref(self, time_series_request): return
class ReferenceDataRequest(Request): def __init__(self, symbols, fields, overrides=None, response_type='frame', ignore_security_error=0, ignore_field_error=0): """ response_type: (frame, map) how to return the results """ assert response_type in ('frame', 'map') Request.__init__(self, ignore_security_error=ignore_security_error, ignore_field_error=ignore_field_error) self.symbols = isinstance(symbols, str) and [symbols] or symbols self.fields = isinstance(fields, str) and [fields] or fields self.overrides = overrides # response related self.response = {} if response_type == 'map' else defaultdict(list) self.response_type = response_type self.logger = LoggerManager().getLogger(__name__) def get_bbg_service_name(self): return '//blp/refdata' def quick_override(self, request, fieldId, val): o = request.GetElement('overrides').AppendElment() o.SetElement('fieldId', fieldId) o.SetElement('value', val) def get_bbg_request(self, svc, session): # create the bbg request object request = svc.CreateRequest('ReferenceDataRequest') [ request.GetElement('securities').AppendValue(sec) for sec in self.symbols ] [request.GetElement('fields').AppendValue(fld) for fld in self.fields] #self.quick_override(request, 'START_DT', '19990101') #self.quick_override(request, 'END_DT', '20200101') self.quick_override(request, 'TIME_ZONE_OVERRIDE', 23) # force GMT time Request.apply_overrides(request, self.overrides) return request def on_security_node(self, node): sid = XmlHelper.get_child_value(node, 'security') farr = node.GetElement('fieldData') fdata = XmlHelper.get_child_values(farr, self.fields) self.logger.debug("Returning reference data...") assert len(fdata) == len( self.fields), 'field length must match data length' if self.response_type == 'map': self.response[sid] = fdata else: self.response['security'].append(sid) [self.response[f].append(d) for f, d in zip(self.fields, fdata)] # Add any field errors if ferrors = XmlHelper.get_field_errors(node) ferrors and self.field_errors.extend(ferrors) def on_event(self, evt, is_final): """ this is invoked from in response to COM PumpWaitingMessages - different thread """ for msg in XmlHelper.message_iter(evt): for node, error in XmlHelper.security_iter( msg.GetElement('securityData')): if error: self.security_errors.append(error) else: self.on_security_node(node) if is_final and self.response_type == 'frame': index = self.response.pop('security') frame = DataFrame(self.response, columns=self.fields, index=index) frame.index.name = 'security' self.response = frame
class LoaderBBG(LoaderTemplate): def __init__(self): super(LoaderBBG, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, time_series_request): """ load_ticker - Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- time_series_request : TimeSeriesRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request) data_frame = None self.logger.info("Request Bloomberg data") # do we need daily or intraday data? if (time_series_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']): # for events times/dates separately needs ReferenceDataRequest (when specified) if 'release-date-time-full' in time_series_request.fields: # experimental datetime_data_frame = self.get_reference_data(time_series_request_vendor, time_series_request) # remove fields 'release-date-time-full' from our request (and the associated field in the vendor) index = time_series_request.fields.index('release-date-time-full') time_series_request_vendor.fields.pop(index) time_series_request.fields.pop(index) # download all the other event fields (uses HistoricalDataRequest to Bloomberg) # concatenate with date time fields if len(time_series_request_vendor.fields) > 0: events_data_frame = self.get_daily_data(time_series_request, time_series_request_vendor) col = events_data_frame.index.name events_data_frame = events_data_frame.reset_index(drop = False) data_frame = pandas.concat([events_data_frame, datetime_data_frame], axis = 1) temp = data_frame[col] del data_frame[col] data_frame.index = temp else: data_frame = datetime_data_frame # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg else: data_frame = self.get_daily_data(time_series_request, time_series_request_vendor) # assume one ticker only # for intraday data we use IntradayDataRequest to Bloomberg if (time_series_request.freq in ['intraday', 'minute', 'hourly']): time_series_request_vendor.tickers = time_series_request_vendor.tickers[0] data_frame = self.download_intraday(time_series_request_vendor) cols = data_frame.columns.values data_frame.tz_localize('UTC') cols = time_series_request.tickers[0] + "." + cols data_frame.columns = cols self.logger.info("Completed request from Bloomberg.") return data_frame def get_daily_data(self, time_series_request, time_series_request_vendor): data_frame = self.download_daily(time_series_request_vendor) # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) if data_frame is not None: # TODO if empty try downloading again a year later fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' return data_frame def get_reference_data(self, time_series_request_vendor, time_series_request): end = datetime.datetime.today() end = end.replace(year = end.year + 1) time_series_request_vendor.finish_date = end self.logger.debug("Requesting ref for " + time_series_request_vendor.tickers[0] + " etc.") data_frame = self.download_ref(time_series_request_vendor) self.logger.debug("Waiting for ref...") # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) if data_frame is not None: # TODO if empty try downloading again a year later fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame = data_frame.convert_objects(convert_dates = 'coerce', convert_numeric= 'coerce') return data_frame # implement method in abstract superclass @abc.abstractmethod def kill_session(self): return @abc.abstractmethod def download_intraday(self, time_series_request): return @abc.abstractmethod def download_daily(self, time_series_request): return @abc.abstractmethod def download_ref(self, time_series_request): return
class LightTimeSeriesFactory: _time_series_cache = {} # shared across all instances of object! def __init__(self): # self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.time_series_filter = TimeSeriesFilter() self.time_series_io = TimeSeriesIO() self._bbg_default_api = Constants().bbg_default_api self._intraday_code = -1 return def set_bloomberg_com_api(self): """ set_bloomberg_com_api - Sets Bloomberg API to COM library """ self._bbg_default_api = 'com-api' def set_bloomberg_open_api(self): """ set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended) """ self._bbg_default_api = 'open-api' def flush_cache(self): """ flush_cache - Flushs internal cache of time series """ self._time_series_cache = {} def set_intraday_code(self, code): self._intraday_code = code def get_loader(self, source): """ get_loader - Loads appropriate data service class Parameters ---------- source : str the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc. Returns ------- LoaderTemplate """ loader = None if source == 'bloomberg': ### allow use of COM API (older) and Open APIs (newer) for Bloomberg if self._bbg_default_api == 'com-api': from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM loader = LoaderBBGCOM() elif self._bbg_default_api == 'open-api': from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen loader = LoaderBBGOpen() elif source == 'quandl': from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl loader = LoaderQuandl() elif source == 'dukascopy': from pythalesians.market.loaders.lowlevel.brokers.loaderdukascopy import LoaderDukasCopy loader = LoaderDukasCopy() elif source in ['yahoo', 'google', 'fred']: from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb loader = LoaderPandasWeb() # TODO add support for other data sources (like Reuters) return loader def harvest_time_series(self, time_series_request, kill_session = True): """ havest_time_series - Loads time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ tickers = time_series_request.tickers loader = self.get_loader(time_series_request.data_source) # check if tickers have been specified (if not load all of them for a category) # also handle single tickers/list tickers create_tickers = False if tickers is None : create_tickers = True elif isinstance(tickers, str): if tickers == '': create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: time_series_request.tickers = self.config.get_tickers_list_for_category( time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut) # intraday or tick: only one ticker per cache file if (time_series_request.freq in ['intraday', 'tick']): data_frame_agg = self.download_intraday_tick(time_series_request, loader) # daily: multiple tickers per cache file - assume we make one API call to vendor library else: data_frame_agg = self.download_daily(time_series_request, loader) if('internet_load' in time_series_request.cache_algo): self.logger.debug("Internet loading.. ") # signal to loader template to exit session if loader is not None and kill_session == True: loader.kill_session() if(time_series_request.cache_algo == 'cache_algo'): self.logger.debug("Only caching data in memory, do not return any time series."); return tsf = TimeSeriesFilter() # only return time series if specified in the algo if 'return' in time_series_request.cache_algo: # special case for events/events-dt which is not indexed like other tables if hasattr(time_series_request, 'category'): if 'events' in time_series_request.category: return data_frame_agg try: return tsf.filter_time_series(time_series_request, data_frame_agg) except: import traceback self.logger.error(traceback.format_exc()) return None def get_time_series_cached(self, time_series_request): """ get_time_series_cached - Loads time series from cache (if it exists) Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ if (time_series_request.freq == "intraday"): ticker = time_series_request.tickers else: ticker = None fname = self.create_time_series_hash_key(time_series_request, ticker) if (fname in self._time_series_cache): data_frame = self._time_series_cache[fname] tsf = TimeSeriesFilter() return tsf.filter_time_series(time_series_request, data_frame) return None def create_time_series_hash_key(self, time_series_request, ticker = None): """ create_time_series_hash_key - Creates a hash key for retrieving the time series Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ if(isinstance(ticker, list)): ticker = ticker[0] return self.create_cache_file_name( self.create_category_key(time_series_request, ticker)) def download_intraday_tick(self, time_series_request, loader): """ download_intraday_tick - Loads intraday time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ data_frame_agg = None ticker_cycle = 0 # handle intraday ticker calls separately one by one for ticker in time_series_request.tickers: time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = ticker if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway data_frame_single = loader.load_ticker(time_series_request_single) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_agg is not None: data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') else: data_frame_agg = data_frame_single # key = self.create_category_key(time_series_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) return data_frame_agg def download_daily(self, time_series_request, loader): """ download_daily - Loads daily time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ # daily data does not include ticker in the key, as multiple tickers in the same file data_frame_agg = loader.load_ticker(time_series_request) key = self.create_category_key(time_series_request) fname = self.create_cache_file_name(key) self._time_series_cache[fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg def create_category_key(self, time_series_request, ticker=None): """ create_category_key - Returns a category key for the associated TimeSeriesRequest Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ category = 'default-cat' cut = 'default-cut' if hasattr(time_series_request, 'category'): category = time_series_request.category source = time_series_request.data_source freq = time_series_request.freq if hasattr(time_series_request, 'cut'): cut = time_series_request.cut if (ticker is not None): key = category + '.' + source + '.' + freq + '.' + cut + '.' + ticker else: key = category + '.' + source + '.' + freq + '.' + cut return key def create_cache_file_name(self, filename): return Constants().folder_time_series_data + "/" + filename
class LightTimeSeriesFactory: _time_series_cache = {} # shared across all instances of object! def __init__(self): # self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.time_series_filter = TimeSeriesFilter() self.time_series_io = TimeSeriesIO() self._bbg_default_api = Constants().bbg_default_api self._intraday_code = -1 return def set_bloomberg_com_api(self): """ set_bloomberg_com_api - Sets Bloomberg API to COM library """ self._bbg_default_api = 'com-api' def set_bloomberg_open_api(self): """ set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended) """ self._bbg_default_api = 'open-api' def flush_cache(self): """ flush_cache - Flushs internal cache of time series """ self._time_series_cache = {} def set_intraday_code(self, code): self._intraday_code = code def get_loader(self, source): """ get_loader - Loads appropriate data service class Parameters ---------- source : str the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc. Returns ------- LoaderTemplate """ loader = None if source == 'bloomberg': ### allow use of COM API (older) and Open APIs (newer) for Bloomberg if self._bbg_default_api == 'com-api': from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM loader = LoaderBBGCOM() elif self._bbg_default_api == 'open-api': from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen loader = LoaderBBGOpen() elif source == 'quandl': from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl loader = LoaderQuandl() elif source == 'dukascopy': from pythalesians.market.loaders.lowlevel.brokers.loaderdukascopy import LoaderDukasCopy loader = LoaderDukasCopy() elif source in ['yahoo', 'google', 'fred']: from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb loader = LoaderPandasWeb() # TODO add support for other data sources (like Reuters) return loader def harvest_time_series(self, time_series_request, kill_session = True): """ havest_time_series - Loads time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ tickers = time_series_request.tickers loader = self.get_loader(time_series_request.data_source) # check if tickers have been specified (if not load all of them for a category) # also handle single tickers/list tickers create_tickers = False if tickers is None : create_tickers = True elif isinstance(tickers, str): if tickers == '': create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: time_series_request.tickers = self.config.get_tickers_list_for_category( time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut) # intraday or tick: only one ticker per cache file if (time_series_request.freq in ['intraday', 'tick']): data_frame_agg = self.download_intraday_tick(time_series_request, loader) # daily: multiple tickers per cache file - assume we make one API call to vendor library else: data_frame_agg = self.download_daily(time_series_request, loader) if('internet_load' in time_series_request.cache_algo): self.logger.debug("Internet loading.. ") # signal to loader template to exit session # if loader is not None and kill_session == True: loader.kill_session() if(time_series_request.cache_algo == 'cache_algo'): self.logger.debug("Only caching data in memory, do not return any time series."); return tsf = TimeSeriesFilter() # only return time series if specified in the algo if 'return' in time_series_request.cache_algo: # special case for events/events-dt which is not indexed like other tables if hasattr(time_series_request, 'category'): if 'events' in time_series_request.category: return data_frame_agg try: return tsf.filter_time_series(time_series_request, data_frame_agg) except: import traceback self.logger.error(traceback.format_exc()) return None def get_time_series_cached(self, time_series_request): """ get_time_series_cached - Loads time series from cache (if it exists) Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ if (time_series_request.freq == "intraday"): ticker = time_series_request.tickers else: ticker = None fname = self.create_time_series_hash_key(time_series_request, ticker) if (fname in self._time_series_cache): data_frame = self._time_series_cache[fname] tsf = TimeSeriesFilter() return tsf.filter_time_series(time_series_request, data_frame) return None def create_time_series_hash_key(self, time_series_request, ticker = None): """ create_time_series_hash_key - Creates a hash key for retrieving the time series Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ if(isinstance(ticker, list)): ticker = ticker[0] return self.create_cache_file_name(self.create_category_key(time_series_request, ticker)) def download_intraday_tick(self, time_series_request, loader): """ download_intraday_tick - Loads intraday time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ data_frame_agg = None ticker_cycle = 0 # single threaded version # handle intraday ticker calls separately one by one if len(time_series_request.tickers) == 1 or Constants().time_series_factory_thread_no['other'] == 1: for ticker in time_series_request.tickers: time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = ticker if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway data_frame_single = loader.load_ticker(time_series_request_single) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_agg is not None: data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') else: data_frame_agg = data_frame_single # key = self.create_category_key(time_series_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) return data_frame_agg else: time_series_request_list = [] # create a list of TimeSeriesRequests for ticker in time_series_request.tickers: time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = ticker if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]] ticker_cycle = ticker_cycle + 1 time_series_request_list.append(time_series_request_single) return self.fetch_group_time_series(time_series_request_list) def fetch_single_time_series(self, time_series_request): data_frame_single = self.get_loader(time_series_request.data_source).load_ticker(time_series_request) if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') return data_frame_single def fetch_group_time_series(self, time_series_request_list): data_frame_agg = None # depends on the nature of operation as to whether we should use threading or multiprocessing library if Constants().time_series_factory_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = Constants().time_series_factory_thread_no['other'] if time_series_request_list[0].data_source in Constants().time_series_factory_thread_no: thread_no = Constants().time_series_factory_thread_no[time_series_request_list[0].data_source] pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, time_series_request_list) data_frame_group = result.get() pool.close() pool.join() # data_frame_group = results.get() # data_frame_group = results # data_frame_group = None #import multiprocessing as multiprocessing # close the pool and wait for the work to finish # processes = [] # for x in range(0, len(time_series_request_list)): # time_series_request = time_series_request_list[x] # processes = [multiprocessing.Process(target = self.fetch_single_time_series, # args = (x)) for x in time_series_request_list] # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq, # exclude_freq_cat, force_new_download_freq_cat, include_freq_cat)) # Run processes # for p in processes: p.start() # Exit the completed processes # for p in processes: p.join() # collect together all the time series if data_frame_group is not None: for data_frame_single in data_frame_group: # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_single is not None: if data_frame_agg is not None: data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') else: data_frame_agg = data_frame_single return data_frame_agg def download_daily(self, time_series_request, loader): """ download_daily - Loads daily time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ # daily data does not include ticker in the key, as multiple tickers in the same file if Constants().time_series_factory_thread_no['other'] == 1: data_frame_agg = loader.load_ticker(time_series_request) else: time_series_request_list = [] group_size = int(len(time_series_request.tickers) / Constants().time_series_factory_thread_no['other'] - 1) if group_size == 0: group_size = 1 # split up tickers into groups related to number of threads to call for i in range(0, len(time_series_request.tickers), group_size): time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = time_series_request.tickers[i:i + group_size] if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = \ time_series_request.vendor_tickers[i:i + group_size] time_series_request_list.append(time_series_request_single) data_frame_agg = self.fetch_group_time_series(time_series_request_list) key = self.create_category_key(time_series_request) fname = self.create_cache_file_name(key) self._time_series_cache[fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg def create_category_key(self, time_series_request, ticker=None): """ create_category_key - Returns a category key for the associated TimeSeriesRequest Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ category = 'default-cat' cut = 'default-cut' if hasattr(time_series_request, 'category'): category = time_series_request.category environment = time_series_request.environment source = time_series_request.data_source freq = time_series_request.freq if hasattr(time_series_request, 'cut'): cut = time_series_request.cut if (ticker is not None): key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker else: key = environment + "." + category + '.' + source + '.' + freq + '.' + cut return key def create_cache_file_name(self, filename): return Constants().folder_time_series_data + "/" + filename
class IntradayBarRequest(Request): def __init__(self, symbol, interval, start=None, end=None, event='TRADE'): """ Intraday bar request for bbg Parameters ---------- symbols : string interval : number of minutes start : start date end : end date (if None then use today) event : (TRADE,BID,ASK,BEST_BID,BEST_ASK) """ Request.__init__(self) self.logger = LoggerManager().getLogger(__name__) assert event in ('TRADE', 'BID', 'ASK', 'BEST_BID', 'BEST_ASK') assert isinstance(symbol, str) if start is None: start = datetime.today() - timedelta(30) if end is None: end = datetime.utcnow() self.symbol = symbol self.interval = interval self.start = to_datetime(start) self.end = to_datetime(end) self.event = event # response related self.response = defaultdict(list) def get_bbg_service_name(self): return '//blp/refdata' def get_bbg_request(self, svc, session): # create the bbg request object start, end = self.start, self.end request = svc.CreateRequest('IntradayBarRequest') request.Set('security', self.symbol) request.Set('interval', self.interval) request.Set('eventType', self.event) request.Set('startDateTime', session.CreateDatetime(start.year, start.month, start.day, start.hour, start.minute)) request.Set('endDateTime', session.CreateDatetime(end.year, end.month, end.day, end.hour, end.minute)) self.logger.info("Fetching intraday data for " + str(self.symbol) + " from " + start.strftime('%d/%m/%Y') + " to " + end.strftime('%d/%m/%Y')) return request def on_event(self, evt, is_final): """ on_event - This is invoked from in response to COM PumpWaitingMessages - different thread """ response = self.response self.logger.debug("Receiving data from Bloomberg...") for msg in XmlHelper.message_iter(evt): bars = msg.GetElement('barData').GetElement('barTickData') self.logger.debug("Read message...") for i in range(bars.NumValues): bar = bars.GetValue(i) ts = bar.GetElement(0).Value dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute) response['time'].append(dt) response['open'].append(bar.GetElement(1).Value) response['high'].append(bar.GetElement(2).Value) response['low'].append(bar.GetElement(3).Value) response['close'].append(bar.GetElement(4).Value) response['volume'].append(bar.GetElement(5).Value) response['events'].append(bar.GetElement(6).Value) if (i % 20000 == 0): dt_str = dt.strftime('%d/%m/%Y') self.logger.debug("Processing " + dt_str) self.logger.debug("Finished processing for ticker.") if is_final: idx = response.pop('time') self.response = DataFrame(response, columns=['open', 'high', 'low', 'close', 'volume', 'events'], index=idx) self.response.index.name = 'Date' self.response = self.response.astype('float32')
class ReferenceDataRequest(Request): def __init__(self, symbols, fields, overrides=None, response_type='frame', ignore_security_error=0, ignore_field_error=0): """ response_type: (frame, map) how to return the results """ assert response_type in ('frame', 'map') Request.__init__(self, ignore_security_error=ignore_security_error, ignore_field_error=ignore_field_error) self.symbols = isinstance(symbols, str) and [symbols] or symbols self.fields = isinstance(fields, str) and [fields] or fields self.overrides = overrides # response related self.response = {} if response_type == 'map' else defaultdict(list) self.response_type = response_type self.logger = LoggerManager().getLogger(__name__) def get_bbg_service_name(self): return '//blp/refdata' def quick_override(self, request, fieldId, val): o = request.GetElement('overrides').AppendElment() o.SetElement('fieldId', fieldId) o.SetElement('value', val) def get_bbg_request(self, svc, session): # create the bbg request object request = svc.CreateRequest('ReferenceDataRequest') [request.GetElement('securities').AppendValue(sec) for sec in self.symbols] [request.GetElement('fields').AppendValue(fld) for fld in self.fields] #self.quick_override(request, 'START_DT', '19990101') #self.quick_override(request, 'END_DT', '20200101') self.quick_override(request, 'TIME_ZONE_OVERRIDE', 23) # force GMT time Request.apply_overrides(request, self.overrides) return request def on_security_node(self, node): sid = XmlHelper.get_child_value(node, 'security') farr = node.GetElement('fieldData') fdata = XmlHelper.get_child_values(farr, self.fields) self.logger.debug("Returning reference data...") assert len(fdata) == len(self.fields), 'field length must match data length' if self.response_type == 'map': self.response[sid] = fdata else: self.response['security'].append(sid) [self.response[f].append(d) for f, d in zip(self.fields, fdata)] # Add any field errors if ferrors = XmlHelper.get_field_errors(node) ferrors and self.field_errors.extend(ferrors) def on_event(self, evt, is_final): """ this is invoked from in response to COM PumpWaitingMessages - different thread """ for msg in XmlHelper.message_iter(evt): for node, error in XmlHelper.security_iter(msg.GetElement('securityData')): if error: self.security_errors.append(error) else: self.on_security_node(node) if is_final and self.response_type == 'frame': index = self.response.pop('security') frame = DataFrame(self.response, columns=self.fields, index=index) frame.index.name = 'security' self.response = frame
class LightTimeSeriesFactory: _time_series_cache = {} # shared across all instances of object! def __init__(self): # self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.time_series_filter = TimeSeriesFilter() self.time_series_io = TimeSeriesIO() self._bbg_default_api = Constants().bbg_default_api self._intraday_code = -1 return def set_bloomberg_com_api(self): """ set_bloomberg_com_api - Sets Bloomberg API to COM library """ self._bbg_default_api = 'com-api' def set_bloomberg_open_api(self): """ set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended) """ self._bbg_default_api = 'open-api' def flush_cache(self): """ flush_cache - Flushs internal cache of time series """ self._time_series_cache = {} def set_intraday_code(self, code): self._intraday_code = code def get_loader(self, source): """ get_loader - Loads appropriate data service class Parameters ---------- source : str the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc. Returns ------- LoaderTemplate """ loader = None if source == 'bloomberg': ### allow use of COM API (older) and Open APIs (newer) for Bloomberg if self._bbg_default_api == 'com-api': from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM loader = LoaderBBGCOM() elif self._bbg_default_api == 'open-api': from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen loader = LoaderBBGOpen() elif source == 'quandl': from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl loader = LoaderQuandl() elif source == 'dukascopy': from pythalesians.market.loaders.lowlevel.brokers.loaderdukascopy import LoaderDukasCopy loader = LoaderDukasCopy() elif source in ['yahoo', 'google', 'fred']: from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb loader = LoaderPandasWeb() # TODO add support for other data sources (like Reuters) return loader def harvest_time_series(self, time_series_request, kill_session=True): """ havest_time_series - Loads time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ tickers = time_series_request.tickers loader = self.get_loader(time_series_request.data_source) # check if tickers have been specified (if not load all of them for a category) # also handle single tickers/list tickers create_tickers = False if tickers is None: create_tickers = True elif isinstance(tickers, str): if tickers == '': create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: time_series_request.tickers = self.config.get_tickers_list_for_category( time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut) # intraday or tick: only one ticker per cache file if (time_series_request.freq in ['intraday', 'tick', 'second', 'hour', 'minute']): data_frame_agg = self.download_intraday_tick( time_series_request, loader) # daily: multiple tickers per cache file - assume we make one API call to vendor library else: data_frame_agg = self.download_daily(time_series_request, loader) if ('internet_load' in time_series_request.cache_algo): self.logger.debug("Internet loading.. ") # signal to loader template to exit session # if loader is not None and kill_session == True: loader.kill_session() if (time_series_request.cache_algo == 'cache_algo'): self.logger.debug( "Only caching data in memory, do not return any time series.") return tsf = TimeSeriesFilter() # only return time series if specified in the algo if 'return' in time_series_request.cache_algo: # special case for events/events-dt which is not indexed like other tables if hasattr(time_series_request, 'category'): if 'events' in time_series_request.category: return data_frame_agg try: return tsf.filter_time_series(time_series_request, data_frame_agg) except: import traceback self.logger.error(traceback.format_exc()) return None def get_time_series_cached(self, time_series_request): """ get_time_series_cached - Loads time series from cache (if it exists) Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ if (time_series_request.freq == "intraday"): ticker = time_series_request.tickers else: ticker = None fname = self.create_time_series_hash_key(time_series_request, ticker) if (fname in self._time_series_cache): data_frame = self._time_series_cache[fname] tsf = TimeSeriesFilter() return tsf.filter_time_series(time_series_request, data_frame) return None def create_time_series_hash_key(self, time_series_request, ticker=None): """ create_time_series_hash_key - Creates a hash key for retrieving the time series Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ if (isinstance(ticker, list)): ticker = ticker[0] return self.create_cache_file_name( self.create_category_key(time_series_request, ticker)) def download_intraday_tick(self, time_series_request, loader): """ download_intraday_tick - Loads intraday time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ data_frame_agg = None ticker_cycle = 0 # single threaded version # handle intraday ticker calls separately one by one if len(time_series_request.tickers) == 1 or Constants( ).time_series_factory_thread_no['other'] == 1: for ticker in time_series_request.tickers: time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = ticker if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = [ time_series_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway data_frame_single = loader.load_ticker( time_series_request_single) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_agg is not None: data_frame_agg = data_frame_agg.join( data_frame_single, how='outer') else: data_frame_agg = data_frame_single # key = self.create_category_key(time_series_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) return data_frame_agg else: time_series_request_list = [] # create a list of TimeSeriesRequests for ticker in time_series_request.tickers: time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = ticker if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = [ time_series_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 time_series_request_list.append(time_series_request_single) return self.fetch_group_time_series(time_series_request_list) def fetch_single_time_series(self, time_series_request): data_frame_single = self.get_loader( time_series_request.data_source).load_ticker(time_series_request) if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' # will fail for dataframes which includes dates try: data_frame_single = data_frame_single.astype('float32') except: pass if time_series_request.freq == "second": data_frame_single = data_frame_single.resample("1s") return data_frame_single def fetch_group_time_series(self, time_series_request_list): data_frame_agg = None # depends on the nature of operation as to whether we should use threading or multiprocessing library if Constants().time_series_factory_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = Constants().time_series_factory_thread_no['other'] if time_series_request_list[0].data_source in Constants( ).time_series_factory_thread_no: thread_no = Constants().time_series_factory_thread_no[ time_series_request_list[0].data_source] pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, time_series_request_list) data_frame_group = result.get() pool.close() pool.join() # data_frame_group = results.get() # data_frame_group = results # data_frame_group = None #import multiprocessing as multiprocessing # close the pool and wait for the work to finish # processes = [] # for x in range(0, len(time_series_request_list)): # time_series_request = time_series_request_list[x] # processes = [multiprocessing.Process(target = self.fetch_single_time_series, # args = (x)) for x in time_series_request_list] # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq, # exclude_freq_cat, force_new_download_freq_cat, include_freq_cat)) # Run processes # for p in processes: p.start() # Exit the completed processes # for p in processes: p.join() # collect together all the time series if data_frame_group is not None: for data_frame_single in data_frame_group: # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_single is not None: if data_frame_agg is not None: data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') else: data_frame_agg = data_frame_single return data_frame_agg def download_daily(self, time_series_request, loader): """ download_daily - Loads daily time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ # daily data does not include ticker in the key, as multiple tickers in the same file if Constants().time_series_factory_thread_no['other'] == 1: data_frame_agg = loader.load_ticker(time_series_request) else: time_series_request_list = [] group_size = int( len(time_series_request.tickers) / Constants().time_series_factory_thread_no['other'] - 1) if group_size == 0: group_size = 1 # split up tickers into groups related to number of threads to call for i in range(0, len(time_series_request.tickers), group_size): time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = time_series_request.tickers[ i:i + group_size] if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = \ time_series_request.vendor_tickers[i:i + group_size] time_series_request_list.append(time_series_request_single) data_frame_agg = self.fetch_group_time_series( time_series_request_list) key = self.create_category_key(time_series_request) fname = self.create_cache_file_name(key) self._time_series_cache[ fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg def create_category_key(self, time_series_request, ticker=None): """ create_category_key - Returns a category key for the associated TimeSeriesRequest Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ category = 'default-cat' cut = 'default-cut' if hasattr(time_series_request, 'category'): category = time_series_request.category environment = time_series_request.environment source = time_series_request.data_source freq = time_series_request.freq if hasattr(time_series_request, 'cut'): cut = time_series_request.cut if (ticker is not None): key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker else: key = environment + "." + category + '.' + source + '.' + freq + '.' + cut return key def create_cache_file_name(self, filename): return Constants().folder_time_series_data + "/" + filename
class LightTimeSeriesFactory: _time_series_cache = {} # shared across all instances of object! def __init__(self): # self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.time_series_filter = TimeSeriesFilter() self.time_series_io = TimeSeriesIO() self._bbg_default_api = Constants().bbg_default_api self._intraday_code = -1 return def set_bloomberg_com_api(self): """ set_bloomberg_com_api - Sets Bloomberg API to COM library """ self._bbg_default_api = 'com-api' def set_bloomberg_open_api(self): """ set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended) """ self._bbg_default_api = 'open-api' def flush_cache(self): """ flush_cache - Flushs internal cache of time series """ self._time_series_cache = {} def set_intraday_code(self, code): self._intraday_code = code def get_loader(self, source): """ get_loader - Loads appropriate data service class Parameters ---------- source : str the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc. Returns ------- LoaderTemplate """ loader = None if source == 'bloomberg': ### allow use of COM API (older) and Open APIs (newer) for Bloomberg if self._bbg_default_api == 'com-api': from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM loader = LoaderBBGCOM() elif self._bbg_default_api == 'open-api': from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen loader = LoaderBBGOpen() elif source == 'quandl': from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl loader = LoaderQuandl() elif source in ['yahoo', 'google', 'fred']: from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb loader = LoaderPandasWeb() # TODO add support for other data sources (like Reuters) return loader def harvest_time_series(self, time_series_request, kill_session = True): """ havest_time_series - Loads time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- DataFrame """ tickers = time_series_request.tickers loader = self.get_loader(time_series_request.data_source) # check if tickers have been specified (if not load all of them for a category) # also handle single tickers/list tickers create_tickers = False if tickers is None : create_tickers = True elif isinstance(tickers, str): if tickers == '': create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: time_series_request.tickers = self.config.get_tickers_list_for_category( time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut) # intraday or tick: only one ticker per cache file if (time_series_request.freq in ['intraday', 'tick']): data_frame_agg = self.download_intraday_tick(time_series_request, loader) # daily: multiple tickers per cache file - assume we make one API call to vendor library else: data_frame_agg = self.download_daily(time_series_request, loader) if('internet_load' in time_series_request.cache_algo): self.logger.debug("Internet loading.. ") # signal to loader template to exit session if loader is not None and kill_session == True: loader.kill_session() if(time_series_request.cache_algo == 'cache_algo'): self.logger.debug("Only caching data in memory, do not return any time series."); return tsf = TimeSeriesFilter() # only return time series if specified in the algo if 'return' in time_series_request.cache_algo: # special case for events/events-dt which is not indexed like other tables if hasattr(time_series_request, 'category'): if 'events' in time_series_request.category: return data_frame_agg try: return tsf.filter_time_series(time_series_request, data_frame_agg) except: return None def get_time_series_cached(self, time_series_request): """ get_time_series_cached - Loads time series from cache (if it exists) Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- DataFrame """ if (time_series_request.freq == "intraday"): ticker = time_series_request.tickers else: ticker = None fname = self.create_time_series_hash_key(time_series_request, ticker) if (fname in self._time_series_cache): data_frame = self._time_series_cache[fname] tsf = TimeSeriesFilter() return tsf.filter_time_series(time_series_request, data_frame) return None def create_time_series_hash_key(self, time_series_request, ticker = None): """ create_time_series_hash_key - Creates a hash key for retrieving the time series Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ if(isinstance(ticker, list)): ticker = ticker[0] return self.create_cache_file_name( self.create_category_key(time_series_request, ticker)) def download_intraday_tick(self, time_series_request, loader): """ download_intraday_tick - Loads intraday time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- DataFrame """ data_frame_agg = None ticker_cycle = 0 # handle intraday ticker calls separately one by one for ticker in time_series_request.tickers: time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = ticker if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = time_series_request.vendor_tickers[ticker_cycle] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway data_frame_single = loader.load_ticker(time_series_request_single) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_agg is not None: data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') else: data_frame_agg = data_frame_single # key = self.create_category_key(time_series_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) return data_frame_agg def download_daily(self, time_series_request, loader): """ download_daily - Loads daily time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- DataFrame """ # daily data does not include ticker in the key, as multiple tickers in the same file data_frame_agg = loader.load_ticker(time_series_request) key = self.create_category_key(time_series_request) fname = self.create_cache_file_name(key) self._time_series_cache[fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg def create_category_key(self, time_series_request, ticker=None): """ create_category_key - Returns a category key for the associated TimeSeriesRequest Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ category = 'default-cat' cut = 'default-cut' if hasattr(time_series_request, 'category'): category = time_series_request.category source = time_series_request.data_source freq = time_series_request.freq if hasattr(time_series_request, 'cut'): cut = time_series_request.cut if (ticker is not None): key = category + '.' + source + '.' + freq + '.' + cut + '.' + ticker else: key = category + '.' + source + '.' + freq + '.' + cut return key def create_cache_file_name(self, filename): return Constants().folder_time_series_data + "/" + filename