def process_response_event(self, event): data_frame_list = [] logger = LoggerManager().getLogger(__name__) for msg in event: # Generates a lot of output - so don't use unless for # debugging purposes # logger.info(msg) if msg.hasElement(self.RESPONSE_ERROR): logger.error("REQUEST FAILED: " + str(msg.getElement(self.RESPONSE_ERROR))) continue data_frame_slice = self.process_message(msg) if (data_frame_slice is not None): data_frame_list.append(data_frame_slice) if data_frame_list == []: logger.warn("No elements for ticker.") return None else: return pd.concat(data_frame_list)
def write_time_series_cache_to_disk( self, fname, data_frame, engine='hdf5_fixed', append_data=False, db_server=constants.db_server, db_port=constants.db_port, username=constants.db_username, password=constants.db_password, filter_out_matching=None, timeout=10, use_cache_compression=constants.use_cache_compression, parquet_compression=constants.parquet_compression, md_request=None, ticker=None): """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic Parmeters --------- fname : str path of file data_frame : DataFrame data frame to be written to disk engine : str 'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this 'hdf5_table' - use HDF5 table format, slower but can append to 'parquet' - use Parquet 'arctic' - use Arctic/MongoDB database 'redis' - use Redis append_data : bool False - write a fresh copy of data on disk each time True - append data to disk db_server : str Database server for arctic (default: '127.0.0.1') timeout : int Number of seconds to do timeout """ logger = LoggerManager().getLogger(__name__) if md_request is not None: fname = self.path_join( fname, md_request.create_category_key(ticker=ticker)) # default HDF5 format hdf5_format = 'fixed' if 'hdf5' in engine: hdf5_format = engine.split('_')[1] engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) data_frame.columns = self.find_replace_chars( data_frame.columns, _invalid_chars, _replace_chars) data_frame.columns = ['A_' + x for x in data_frame.columns] data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns') bcolzpath = self.get_bcolz_filename(fname) shutil.rmtree(bcolzpath, ignore_errors=True) zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath) elif (engine == 'redis'): fname = os.path.basename(fname).replace('.', '_') # Will fail if Redis is not installed try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) ping = r.ping() # If Redis is alive, try pushing to it if ping: if data_frame is not None: if isinstance(data_frame, pandas.DataFrame): mem = data_frame.memory_usage(deep='deep').sum() mem_float = round( float(mem) / (1024.0 * 1024.0), 3) if mem_float < 500: # msgpack/blosc is deprecated # r.set(fname, data_frame.to_msgpack(compress='blosc')) # now uses pyarrow context = pa.default_serialization_context() ser = context.serialize(data_frame).to_buffer() if use_cache_compression: comp = pa.compress(ser, codec='lz4', asbytes=True) siz = len(ser) # siz = 3912 r.set('comp_' + str(siz) + '_' + fname, comp) else: r.set(fname, ser.to_pybytes()) logger.info("Pushed " + fname + " to Redis") else: logger.warn("Did not push " + fname + " to Redis, given size") else: logger.info("Object " + fname + " is empty, not pushed to Redis.") else: logger.warning("Didn't push " + fname + " to Redis given not running") except Exception as e: logger.warning("Couldn't push " + fname + " to Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') logger.info('Load Arctic/MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) database = None try: database = store[fname] except: pass if database is None: store.initialize_library(fname, audit=False) logger.info("Created MongoDB library: " + fname) else: logger.info("Got MongoDB library: " + fname) # Access the library library = store[fname] if ('intraday' in fname): data_frame = data_frame.astype('float32') if filter_out_matching is not None: cols = data_frame.columns new_cols = [] for col in cols: if filter_out_matching not in col: new_cols.append(col) data_frame = data_frame[new_cols] # Problems with Arctic when writing timezone to disk sometimes, so strip data_frame = data_frame.copy().tz_localize(None) try: # Can duplicate values if we have existing dates if append_data: library.append(fname, data_frame) else: library.write(fname, data_frame) c.close() logger.info("Written MongoDB library: " + fname) except Exception as e: logger.warning("Couldn't write MongoDB library: " + fname + " " + str(e)) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # append data only works for HDF5 stored as tables (but this is much slower than fixed format) # removes duplicated entries at the end if append_data: store = pandas.HDFStore(h5_filename, format=hdf5_format, complib="zlib", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') # get last row which matches and remove everything after that (because append # function doesn't check for duplicated rows nrows = len(store['data'].index) last_point = data_frame.index[-1] i = nrows - 1 while (i > 0): read_index = store.select('data', start=i, stop=nrows).index[0] if (read_index <= last_point): break i = i - 1 # remove rows at the end, which are duplicates of the incoming time series store.remove(key='data', start=i, stop=nrows) store.put(key='data', value=data_frame, format=hdf5_format, append=True) store.close() else: h5_filename_temp = self.get_h5_filename(fname + ".temp") # delete the old copy try: os.remove(h5_filename_temp) except: pass store = pandas.HDFStore(h5_filename_temp, complib="zlib", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') store.put(key='data', value=data_frame, format=hdf5_format) store.close() # delete the old copy try: os.remove(h5_filename) except: pass # once written to disk rename os.rename(h5_filename_temp, h5_filename) logger.info("Written HDF5: " + fname) elif (engine == 'parquet'): if '.parquet' not in fname: if fname[-5:] != '.gzip': fname = fname + '.parquet' self.to_parquet(data_frame, fname, aws_region=constants.aws_region, parquet_compression=parquet_compression) # data_frame.to_parquet(fname, compression=parquet_compression) logger.info("Written Parquet: " + fname) elif engine == 'csv': if '.csv' not in fname: fname = fname + '.csv' data_frame.to_csv(fname) logger.info("Written CSV: " + fname)
def load_time_series(self, market_data_request): # if(BBGLowLevelTemplate._session is None): logger = LoggerManager().getLogger(__name__) session = self.start_bloomberg_session() # else: # session = BBGLowLevelTemplate._session try: # if can't open the session, kill existing one # then try reopen (up to 5 times...) i = 0 while i < 5: if session is not None: if not session.openService("//blp/refdata"): logger.info("Try reopening Bloomberg session... try " + str(i)) self.kill_session( session ) # need to forcibly kill_session since can't always reopen session = self.start_bloomberg_session() if session is not None: if session.openService("//blp/refdata"): i = 6 else: logger.info("Try opening Bloomberg session... try " + str(i)) session = self.start_bloomberg_session() i = i + 1 # give error if still doesn't work after several tries.. if not session.openService("//blp/refdata"): logger.error("Failed to open //blp/refdata") return logger.info("Creating request...") eventQueue = blpapi.EventQueue() # eventQueue = None # create a request from blpapi import CorrelationId cid = CorrelationId() options = self.fill_options(market_data_request) if options.security is not None: self.send_bar_request(session, eventQueue, options, cid) logger.info("Waiting for data to be returned...") data_frame = self.event_loop(session) else: logger.warn("No ticker or field specified!") data_frame = None finally: # stop the session (will fail if NoneType) try: session.stop() except: pass return data_frame
class BBGLowLevelDaily(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelDaily, self).__init__() self.logger = LoggerManager().getLogger(__name__) def combine_slices(self, data_frame_cols, data_frame_slice): # data try: if (data_frame_slice.columns.get_level_values(1).values[0] not in data_frame_cols): # return data_frame.join(data_frame_slice, how="outer") return data_frame_slice except Exception as e: self.logger.warn('Data slice empty ' + str(e)) return None return None # populate options for Bloomberg request for asset daily request def fill_options(self, market_data_request): options = OptionsBBG() options.security = market_data_request.tickers options.startDateTime = market_data_request.start_date options.endDateTime = market_data_request.finish_date options.fields = market_data_request.fields options.overrides = market_data_request.overrides return options def process_message(self, msg): # Process received events # SLOW loop (careful, not all the fields will be returned every time hence need to include the field name in the tuple) # perhaps try to run in parallel? implementation = 'simple' if implementation == 'simple': ticker = msg.getElement('securityData').getElement( 'security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') data = defaultdict(dict) # # # FASTER avoid calling getValue/getElement methods in blpapi, very slow, better to cache variables for i in range(fieldData.numValues()): mini_field_data = fieldData.getValue(i) date = mini_field_data.getElement(0).getValue() for j in range(1, mini_field_data.numElements()): field_value = mini_field_data.getElement(j) data[(str(field_value.name()), ticker)][date] = field_value.getValue() # ORIGINAL repeated calling getValue/getElement much slower # for i in range(fieldData.numValues()): # for j in range(1, fieldData.getValue(i).numElements()): # data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \ # = fieldData.getValue(i).getElement(j).getValue() elif implementation == 'py4j': pass # TODO Py4J # from findatapy.market.bbgloop import bbgloop # from py4j.java_gateway import JavaGateway # gateway = JavaGateway() # data = gateway.entry_point.parseFieldDataArray(msg) elif implementation == 'cython': ticker = msg.getElement('securityData').getElement( 'security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') from findatapy.market.bbgloop import bbgloop data = bbgloop(fieldData, ticker) elif implementation == 'numba': ticker = msg.getElement('securityData').getElement( 'security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') from findatapy.market.bbgloop_numba import bbgloop_numba data = bbgloop_numba(fieldData, ticker) # TODO cython data_frame = pandas.DataFrame(data) # if obsolete ticker could return no values if (not (data_frame.empty)): # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker']) data_frame.index = pandas.to_datetime(data_frame.index) self.logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) else: return None return data_frame # create request for data def send_bar_request(self, session, eventQueue, options, cid): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("HistoricalDataRequest") request.set("startDate", options.startDateTime.strftime('%Y%m%d')) request.set("endDate", options.endDateTime.strftime('%Y%m%d')) # # only one security/eventType per request for field in options.fields: request.getElement("fields").appendValue(field) for security in options.security: request.getElement("securities").appendValue(security) self.logger.info("Sending Bloomberg Daily Request:" + str(request)) session.sendRequest(request=request, correlationId=cid)