def send_bar_request(self, session, eventQueue, options, cid): logger = LoggerManager().getLogger(__name__) refDataService = session.getService("//blp/refdata") request = refDataService.createRequest('ReferenceDataRequest') self.add_override(request, 'TIME_ZONE_OVERRIDE', 23) # force GMT time self.add_override(request, 'INCLUDE_EXPIRED_CONTRACTS', "Y") # include expired contracts self.add_override(request, 'START_DT', options.startDateTime.strftime('%Y%m%d')) self.add_override(request, 'END_DT', options.endDateTime.strftime('%Y%m%d')) # Only one security/eventType per request for field in options.fields: request.getElement("fields").appendValue(field) for security in options.security: request.getElement("securities").appendValue(security) # Add user defined overrides for BBG request self.add_override_dict(request, options) logger.info("Sending Bloomberg Ref Request:" + str(request)) session.sendRequest(request=request, correlationId=cid)
def process_message(self, msg): data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA) logger = LoggerManager().getLogger(__name__) data_vals = list(data.values()) # Each price time point has multiple fields - marginally quicker tuple = [([ bar.getElementAsFloat(self.OPEN), bar.getElementAsFloat(self.HIGH), bar.getElementAsFloat(self.LOW), bar.getElementAsFloat(self.CLOSE), bar.getElementAsInteger(self.VOLUME), bar.getElementAsInteger(self.NUM_EVENTS) ], bar.getElementAsDatetime(self.TIME)) for bar in data_vals] data_table = list(map(itemgetter(0), tuple)) time_list = list(map(itemgetter(1), tuple)) try: logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1])) except: logger.info("No dates retrieved") return None # create pandas dataframe with the Bloomberg output return pd.DataFrame( data=data_table, index=time_list, columns=["open", "high", "low", "close", "volume", "events"])
def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None): """Converts CSV file to HDF5 file Parameters ---------- f_name : str File name to be read category : str data category of file (used in HDF5 filename) freq : str intraday/daily frequency (used in HDF5 filename) cutoff : DateTime (optional) filter dates up to here dateparse : str date parser to use """ logger = LoggerManager().getLogger(__name__) logger.info("About to read... " + f_name) data_frame = self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse) category_f_name = self.create_cache_file_name(category) self.write_time_series_cache_to_disk(category_f_name, data_frame)
def send_bar_request(self, session, eventQueue, options, cid): logger = LoggerManager().getLogger(__name__) refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("IntradayBarRequest") # only one security/eventType per request request.set("security", options.security) request.set("eventType", options.event) request.set("interval", options.barInterval) # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT') if options.startDateTime is not None and options.endDateTime is not None: request.set("startDateTime", options.startDateTime) request.set("endDateTime", options.endDateTime) if options.gapFillInitialBar: request.append("gapFillInitialBar", True) # Add user defined overrides for BBG request self.add_override_dict(request, options) logger.info("Sending Intraday Bloomberg Request...") session.sendRequest(request=request, correlationId=cid)
def send_bar_request(self, session, eventQueue, options, cid): logger = LoggerManager().getLogger(__name__) refDataService = session.getService("//blp/refdata") request = refDataService.createRequest('ReferenceDataRequest') self.add_override(request, 'TIME_ZONE_OVERRIDE', 23) # force GMT time self.add_override(request, 'INCLUDE_EXPIRED_CONTRACTS', "Y") # include expired contracts self.add_override(request, 'START_DT', options.startDateTime.strftime('%Y%m%d')) self.add_override(request, 'END_DT', options.endDateTime.strftime('%Y%m%d')) # only one security/eventType per request for field in options.fields: request.getElement("fields").appendValue(field) for security in options.security: request.getElement("securities").appendValue(security) if options.overrides != {}: for k in options.overrides.keys(): new_k = k # is there a pretty name for this? if k in super().convert_override_fields: new_k = super().convert_override_fields[k] self.add_override(request, new_k, options.overrides[k]) logger.info("Sending Bloomberg Ref Request:" + str(request)) session.sendRequest(request=request, correlationId=cid)
def load_assets(self, br=None): ##### FILL IN WITH YOUR ASSET DATA from findatapy.util.loggermanager import LoggerManager logger = LoggerManager().getLogger(__name__) # For FX basket full_bkt = [ 'EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD', 'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK' ] basket_dict = {} for i in range(0, len(full_bkt)): basket_dict[full_bkt[i]] = [full_bkt[i]] basket_dict['FX trend'] = full_bkt br = self.load_parameters(br=br) logger.info("Loading asset data...") vendor_tickers = [ 'FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS', 'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS' ] market_data_request = MarketDataRequest( start_date=br.start_date, # start date finish_date=br.finish_date, # finish date freq='daily', # daily data data_source='quandl', # use Quandl as data source tickers=full_bkt, # ticker (Thalesians) fields=['close'], # which fields to download vendor_tickers=vendor_tickers, # ticker (Quandl) vendor_fields=['close'], # which Bloomberg fields to download cache_algo='cache_algo_return') # how to return data asset_df = self.market.fetch_market(market_data_request) # If web connection fails read from CSV if asset_df is None: import pandas asset_df = pandas.read_csv( "d:/fxcta.csv", index_col=0, parse_dates=['Date'], date_parser=lambda x: pandas.datetime.strptime(x, '%Y-%m-%d')) # Signalling variables spot_df = asset_df spot_df2 = None # asset_df return asset_df, spot_df, spot_df2, basket_dict
def pad_time_series_columns(self, columns, data_frame): """Selects time series from a dataframe and if necessary creates empty columns Parameters ---------- columns : str columns to be included with this keyword data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ old_columns = data_frame.columns.tolist() common_columns = [val for val in columns if val in old_columns] uncommon_columns = [val for val in columns if val not in old_columns] uncommon_columns = [str(x) for x in uncommon_columns] data_frame = data_frame[common_columns] if len(uncommon_columns) > 0: logger = LoggerManager().getLogger(__name__) logger.info( "Padding missing columns...") # " + str(uncommon_columns)) new_data_frame = pd.DataFrame(index=data_frame.index, columns=uncommon_columns) data_frame = pd.concat([data_frame, new_data_frame], axis=1) # Force new columns to float NaNs (not objects which causes # problems with newer pandas versions) # or to NaT if they are date columns for u in uncommon_columns: is_date = False for c in constants.always_date_columns: if c in u: is_date = True if is_date: data_frame[u] = np.datetime64('NaT') else: data_frame[u] = np.nan # SLOW method below # for x in uncommon_columns: data_frame.loc[:,x] = np.nan # Get columns in same order again data_frame = data_frame[columns] return data_frame
def kill_session(self, session): logger = LoggerManager().getLogger(__name__) if (session is not None): try: session.stop() logger.info("Stopping session...") finally: logger.info("Finally stopping session...") session = None
def run_day_of_month_analysis(self, trading_model, resample_freq='B'): from finmarketpy.economics.seasonality import Seasonality logger = LoggerManager().getLogger(__name__) calculations = Calculations() seas = Seasonality() trading_model.construct_strategy() pnl = trading_model.strategy_pnl() # Get seasonality by day of the month pnl = pnl.resample('B').mean() rets = calculations.calculate_returns(pnl).tz_localize(None) bus_day = seas.bus_day_of_month_seasonality( rets, add_average=True, resample_freq=resample_freq) # Get seasonality by month pnl = pnl.resample('BM').mean() rets = calculations.calculate_returns(pnl).tz_localize(None) month = seas.monthly_seasonality(rets) logger.info("About to plot seasonality...") style = Style() # Plotting spot over day of month/month of year style.color = 'Blues' style.scale_factor = trading_model.SCALE_FACTOR style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.html' style.title = trading_model.FINAL_STRATEGY + ' day of month seasonality' style.display_legend = False style.color_2_series = [bus_day.columns[-1]] style.color_2 = ['red'] # red, pink style.linewidth_2 = 4 style.linewidth_2_series = [bus_day.columns[-1]] style.y_axis_2_series = [bus_day.columns[-1]] self.chart.plot(bus_day, chart_type='line', style=style) style = Style() style.scale_factor = trading_model.SCALE_FACTOR style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.html' style.title = trading_model.FINAL_STRATEGY + ' month of year seasonality' self.chart.plot(month, chart_type='line', style=style) return month
def load_assets(self, br = None): ##### FILL IN WITH YOUR ASSET DATA from findatapy.util.loggermanager import LoggerManager logger = LoggerManager().getLogger(__name__) # for FX basket full_bkt = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD', 'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK'] basket_dict = {} for i in range(0, len(full_bkt)): basket_dict[full_bkt[i]] = [full_bkt[i]] basket_dict['FX trend'] = full_bkt br = self.load_parameters(br = br) logger.info("Loading asset data...") vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS', 'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS'] market_data_request = MarketDataRequest( start_date = br.start_date, # start date finish_date = br.finish_date, # finish date freq = 'daily', # daily data data_source = 'quandl', # use Quandl as data source tickers = full_bkt, # ticker (Thalesians) fields = ['close'], # which fields to download vendor_tickers = vendor_tickers, # ticker (Quandl) vendor_fields = ['close'], # which Bloomberg fields to download cache_algo = 'cache_algo_return') # how to return data asset_df = self.market.fetch_market(market_data_request) # if web connection fails read from CSV if asset_df is None: import pandas asset_df = pandas.read_csv("d:/fxcta.csv", index_col=0, parse_dates=['Date'], date_parser = lambda x: pandas.datetime.strptime(x, '%Y-%m-%d')) # signalling variables spot_df = asset_df spot_df2 = None # asset_df return asset_df, spot_df, spot_df2, basket_dict
def get_daily_data(self, md_request, md_request_vendor): logger = LoggerManager().getLogger(__name__) data_frame = self.download_daily(md_request_vendor) # Convert from vendor to findatapy tickers/fields if data_frame is not None: if data_frame.empty: logger.info("No tickers returned for...") try: logger.info(str(md_request_vendor.tickers)) except: pass return None returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) # TODO if empty try downloading again a year later try: fields = self.translate_from_vendor_field( returned_fields, md_request) except: print('Problem translating vendor field') tickers = self.translate_from_vendor_ticker( returned_tickers, md_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) # Convert numerical columns to floats and dates to dates (avoids # having object columns which can cause issues with later Pandas) data_frame = self.force_type_conversion(data_frame) data_frame.columns = ticker_combined data_frame.index.name = 'Date' # Force sorting of index try: data_frame = data_frame.sort_index() except: pass return data_frame
def process_message(self, msg): constants = DataConstants() # Process received events # SLOW loop (careful, not all the fields will be returned every time # hence need to include the field name in the tuple) # perhaps try to run in parallel? logger = LoggerManager().getLogger(__name__) ticker = msg.getElement('securityData').getElement( 'security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') data = defaultdict(dict) # FASTER avoid calling getValue/getElement methods in blpapi, # very slow, better to cache variables for i in range(fieldData.numValues()): mini_field_data = fieldData.getValue(i) date = mini_field_data.getElement(0).getValue() for j in range(1, mini_field_data.numElements()): field_value = mini_field_data.getElement(j) data[(str(field_value.name()), ticker)][date] = field_value.getValue() # ORIGINAL repeated calling getValue/getElement much slower # for i in range(fieldData.numValues()): # for j in range(1, fieldData.getValue(i).numElements()): # data[(str(fieldData.getValue(i).getElement(j).name()), # ticker)][fieldData.getValue(i).getElement(0).getValue()] \ # = fieldData.getValue(i).getElement(j).getValue() data_frame = pd.DataFrame(data) # If obsolete ticker could return no values if data_frame.empty: return None else: # data_frame.columns = pd.MultiIndex.from_tuples(data, # names=['field', 'ticker']) data_frame.index = pd.to_datetime(data_frame.index) logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) return data_frame
def _run_strategy(self, trading_model, asset_df, spot_df, spot_df2, br, contract_value_df, pretty_portfolio_name): logger = LoggerManager().getLogger(__name__) logger.info("Calculating... " + str(pretty_portfolio_name)) signal_df = trading_model.construct_signal(spot_df, spot_df2, br.tech_params, br, run_in_parallel=False) backtest = Backtest() backtest.calculate_trading_PnL(br, asset_df, signal_df, contract_value_df, False) ret_stats = backtest.portfolio_pnl_ret_stats() stats = str(backtest.portfolio_pnl_desc()[0]) port = backtest.portfolio_cum().resample('B').mean() port.columns = [str(pretty_portfolio_name) + ' ' + stats] return port, ret_stats
def send_bar_request(self, session, eventQueue, options, cid): logger = LoggerManager().getLogger(__name__) refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("HistoricalDataRequest") request.set("startDate", options.startDateTime.strftime('%Y%m%d')) request.set("endDate", options.endDateTime.strftime('%Y%m%d')) # Only one security/eventType per request for field in options.fields: request.getElement("fields").appendValue(field) for security in options.security: request.getElement("securities").appendValue(security) logger.info("Sending Bloomberg Daily Request:" + str(request)) session.sendRequest(request=request, correlationId=cid)
def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields=None): """Write a DataFrame to disk in as an R compatible HDF5 file. Parameters ---------- data_frame : DataFrame data frame to be written fname : str file path to be written fields : list(str) columns to be written """ logger = LoggerManager().getLogger(__name__) fname_r = self.get_h5_filename(fname) logger.info("About to dump R binary HDF5 - " + fname_r) data_frame32 = data_frame.astype('float32') if fields is None: fields = data_frame32.columns.values # decompose date/time into individual fields (easier to pick up in R) data_frame32['Year'] = data_frame.index.year data_frame32['Month'] = data_frame.index.month data_frame32['Day'] = data_frame.index.day data_frame32['Hour'] = data_frame.index.hour data_frame32['Minute'] = data_frame.index.minute data_frame32['Second'] = data_frame.index.second data_frame32['Millisecond'] = data_frame.index.microsecond / 1000 data_frame32 = data_frame32[[ 'Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond' ] + fields] cols = data_frame32.columns store_export = pandas.HDFStore(fname_r) store_export.put('df_for_r', data_frame32, data_columns=cols) store_export.close()
def send_bar_request(self, session, eventQueue, options, cid): logger = LoggerManager().getLogger(__name__) refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("IntradayTickRequest") # only one security/eventType per request request.set("security", options.security) request.getElement("eventTypes").appendValue("TRADE") # request.set("eventTypes", self._options.event) request.set("includeConditionCodes", True) # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT') if options.startDateTime and options.endDateTime: request.set("startDateTime", options.startDateTime) request.set("endDateTime", options.endDateTime) logger.info("Sending Tick Bloomberg Request...") session.sendRequest(request=request, correlationId=cid)
def clean_csv_file(self, f_name): """Cleans up CSV file (removing empty characters) before writing back to disk Parameters ---------- f_name : str CSV file to be cleaned """ logger = LoggerManager().getLogger(__name__) with codecs.open(f_name, 'rb', 'utf-8') as myfile: data = myfile.read() # clean file first if dirty if data.count('\x00'): logger.info('Cleaning CSV...') with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of: of.write(data.replace('\x00', '')) shutil.move(f_name + '.tmp', f_name)
def start_bloomberg_session(self): constants = DataConstants() tries = 0 session = None logger = LoggerManager().getLogger(__name__) # Try up to 5 times to start a session while (tries < 5): try: # fill SessionOptions sessionOptions = blpapi.SessionOptions() sessionOptions.setServerHost(constants.bbg_server) sessionOptions.setServerPort(constants.bbg_server_port) logger.info("Starting Bloomberg session...") # create a Session session = blpapi.Session(sessionOptions) # start a Session if not session.start(): logger.error("Failed to start session.") return logger.info("Returning session...") tries = 5 except: tries = tries + 1 # BBGLowLevelTemplate._session = session if session is None: logger.error("Failed to start session.") return return session
def pad_time_series_columns(self, columns, data_frame): """Selects time series from a dataframe and if necessary creates empty columns Parameters ---------- columns : str columns to be included with this keyword data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ old_columns = data_frame.columns common_columns = [val for val in columns if val in old_columns] uncommon_columns = [val for val in columns if val not in old_columns] uncommon_columns = [str(x) for x in uncommon_columns] data_frame = data_frame[common_columns] if len(uncommon_columns) > 0: logger = LoggerManager().getLogger(__name__) logger.info( "Padding missing columns...") # " + str(uncommon_columns)) new_data_frame = pd.DataFrame(index=data_frame.index, columns=uncommon_columns) data_frame = pd.concat([data_frame, new_data_frame], axis=1) # SLOW method below # for x in uncommon_columns: data_frame.loc[:,x] = np.nan # get columns in same order again data_frame = data_frame[columns] return data_frame
def process_message(self, msg): data = msg.getElement(self.TICK_DATA).getElement(self.TICK_DATA) logger = LoggerManager().getLogger(__name__) # logger.info("Processing tick data for " + str(self._options.security)) data_vals = data.values() # for item in list(data_vals): # if item.hasElement(self.COND_CODE): # cc = item.getElementAsString(self.COND_CODE) # else: # cc = "" # # # each price time point has multiple fields - marginally quicker # tuple.append(([item.getElementAsFloat(self.VALUE), # item.getElementAsInteger(self.TICK_SIZE)], # item.getElementAsDatetime(self.TIME))) # slightly faster this way (note, we are skipping trade & CC fields) tuple = [([ item.getElementAsFloat(self.VALUE), item.getElementAsInteger(self.TICK_SIZE) ], item.getElementAsDatetime(self.TIME)) for item in data_vals] data_table = list(map(itemgetter(0), tuple)) time_list = list(map(itemgetter(1), tuple)) try: logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1])) except: logger.info("No dates retrieved") return None # create pandas dataframe with the Bloomberg output return pd.DataFrame(data=data_table, index=time_list, columns=["close", "ticksize"])
def get_daily_data(self, market_data_request, market_data_request_vendor): logger = LoggerManager().getLogger(__name__) data_frame = self.download_daily(market_data_request_vendor) # convert from vendor to findatapy tickers/fields if data_frame is not None: if data_frame.empty: logger.info("No tickers returned for...") try: logger.info(str(market_data_request_vendor.tickers)) except: pass return None returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) # TODO if empty try downloading again a year later try: fields = self.translate_from_vendor_field( returned_fields, market_data_request) except: print('Problem translating vendor field') tickers = self.translate_from_vendor_ticker( returned_tickers, market_data_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' return data_frame
class FXCrossFactory(object): def __init__(self, market_data_generator=None): self.logger = LoggerManager().getLogger(__name__) self.fxconv = FXConv() self.cache = {} self.calculations = Calculations() self.market_data_generator = market_data_generator return def flush_cache(self): self.cache = {} def get_fx_cross_tick(self, start, end, cross, cut="NYC", source="dukascopy", cache_algo='internet_load_return', type='spot', environment='backtest', fields=['bid', 'ask']): if isinstance(cross, str): cross = [cross] market_data_request = MarketDataRequest( gran_freq="tick", freq_mult=1, freq='tick', cut=cut, fields=['bid', 'ask', 'bidv', 'askv'], cache_algo=cache_algo, environment=environment, start_date=start, finish_date=end, data_source=source, category='fx') market_data_generator = self.market_data_generator data_frame_agg = None for cr in cross: if (type == 'spot'): market_data_request.tickers = cr cross_vals = market_data_generator.fetch_market_data( market_data_request) # if user only wants 'close' calculate that from the bid/ask fields if fields == ['close']: cross_vals = cross_vals[[cr + '.bid', cr + '.ask']].mean(axis=1) cross_vals.columns = [cr + '.close'] if data_frame_agg is None: data_frame_agg = cross_vals else: data_frame_agg = data_frame_agg.join(cross_vals, how='outer') # strip the nan elements data_frame_agg = data_frame_agg.dropna() return data_frame_agg def get_fx_cross(self, start, end, cross, cut="NYC", source="bloomberg", freq="intraday", cache_algo='internet_load_return', type='spot', environment='backtest', fields=['close']): if source == "gain" or source == 'dukascopy' or freq == 'tick': return self.get_fx_cross_tick(start, end, cross, cut=cut, source=source, cache_algo=cache_algo, type='spot', fields=fields) if isinstance(cross, str): cross = [cross] market_data_request_list = [] freq_list = [] type_list = [] for cr in cross: market_data_request = MarketDataRequest(freq_mult=1, cut=cut, fields=['close'], freq=freq, cache_algo=cache_algo, start_date=start, finish_date=end, data_source=source, environment=environment) market_data_request.type = type market_data_request.cross = cr if freq == 'intraday': market_data_request.gran_freq = "minute" # intraday elif freq == 'daily': market_data_request.gran_freq = "daily" # daily market_data_request_list.append(market_data_request) data_frame_agg = [] # depends on the nature of operation as to whether we should use threading or multiprocessing library if DataConstants().market_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = DataConstants().market_thread_no['other'] if market_data_request_list[0].data_source in DataConstants( ).market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request_list[0].data_source] # fudge, issue with multithreading and accessing HDF5 files # if self.market_data_generator.__class__.__name__ == 'CachedMarketDataGenerator': # thread_no = 0 if (thread_no > 0): pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self._get_individual_fx_cross, market_data_request_list) data_frame_agg = self.calculations.iterative_outer_join( result.get()) # data_frame_agg = self.calculations.pandas_outer_join(result.get()) # pool would have already been closed earlier # try: # pool.close() # pool.join() # except: pass else: for md_request in market_data_request_list: data_frame_agg.append( self._get_individual_fx_cross(md_request)) data_frame_agg = self.calculations.pandas_outer_join( data_frame_agg) # strip the nan elements data_frame_agg = data_frame_agg.dropna() return data_frame_agg def _get_individual_fx_cross(self, market_data_request): cr = market_data_request.cross type = market_data_request.type freq = market_data_request.freq base = cr[0:3] terms = cr[3:6] if (type == 'spot'): # non-USD crosses if base != 'USD' and terms != 'USD': base_USD = self.fxconv.correct_notation('USD' + base) terms_USD = self.fxconv.correct_notation('USD' + terms) # TODO check if the cross exists in the database # download base USD cross market_data_request.tickers = base_USD market_data_request.category = 'fx' if base_USD + '.close' in self.cache: base_vals = self.cache[base_USD + '.close'] else: base_vals = self.market_data_generator.fetch_market_data( market_data_request) self.cache[base_USD + '.close'] = base_vals # download terms USD cross market_data_request.tickers = terms_USD market_data_request.category = 'fx' if terms_USD + '.close' in self.cache: terms_vals = self.cache[terms_USD + '.close'] else: terms_vals = self.market_data_generator.fetch_market_data( market_data_request) self.cache[terms_USD + '.close'] = terms_vals # if quoted USD/base flip to get USD terms if (base_USD[0:3] == 'USD'): if 'USD' + base in '.close' in self.cache: base_vals = self.cache['USD' + base + '.close'] else: base_vals = 1 / base_vals self.cache['USD' + base + '.close'] = base_vals # if quoted USD/terms flip to get USD terms if (terms_USD[0:3] == 'USD'): if 'USD' + terms in '.close' in self.cache: terms_vals = self.cache['USD' + terms + '.close'] else: terms_vals = 1 / terms_vals self.cache['USD' + terms + '.close'] = base_vals base_vals.columns = ['temp'] terms_vals.columns = ['temp'] cross_vals = base_vals.div(terms_vals, axis='index') cross_vals.columns = [cr + '.close'] base_vals.columns = [base_USD + '.close'] terms_vals.columns = [terms_USD + '.close'] else: # if base == 'USD': non_USD = terms # if terms == 'USD': non_USD = base correct_cr = self.fxconv.correct_notation(cr) market_data_request.tickers = correct_cr market_data_request.category = 'fx' if correct_cr + '.close' in self.cache: cross_vals = self.cache[correct_cr + '.close'] else: cross_vals = self.market_data_generator.fetch_market_data( market_data_request) # flip if not convention if (correct_cr != cr): if cr + '.close' in self.cache: cross_vals = self.cache[cr + '.close'] else: cross_vals = 1 / cross_vals self.cache[cr + '.close'] = cross_vals self.cache[correct_cr + '.close'] = cross_vals # cross_vals = self.market_data_generator.harvest_time_series(market_data_request) cross_vals.columns.names = [cr + '.close'] elif type[0:3] == "tot": if freq == 'daily': # download base USD cross market_data_request.tickers = base + 'USD' market_data_request.category = 'fx-tot' if type == "tot": base_vals = self.market_data_generator.fetch_market_data( market_data_request) else: x = 0 # download terms USD cross market_data_request.tickers = terms + 'USD' market_data_request.category = 'fx-tot' if type == "tot": terms_vals = self.market_data_generator.fetch_market_data( market_data_request) else: pass base_rets = self.calculations.calculate_returns(base_vals) terms_rets = self.calculations.calculate_returns(terms_vals) cross_rets = base_rets.sub(terms_rets.iloc[:, 0], axis=0) # first returns of a time series will by NaN, given we don't know previous point cross_rets.iloc[0] = 0 cross_vals = self.calculations.create_mult_index(cross_rets) cross_vals.columns = [cr + '-tot.close'] elif freq == 'intraday': self.logger.info( 'Total calculated returns for intraday not implemented yet' ) return None return cross_vals
def load_time_series(self, md_request): # if(BBGLowLevelTemplate._session is None): logger = LoggerManager().getLogger(__name__) session = self.start_bloomberg_session() # else: # session = BBGLowLevelTemplate._session def download_data_frame(sess, eventQ, opt, ci): if opt.security is not None: self.send_bar_request(sess, eventQ, opt, ci) logger.info("Waiting for data to be returned...") return self.event_loop(sess) else: logger.warn("No ticker or field specified!") return None try: # if can't open the session, kill existing one # then try reopen (up to 5 times...) i = 0 while i < 5: if session is not None: if not session.openService("//blp/refdata"): logger.info("Try reopening Bloomberg session... try " + str(i)) self.kill_session( session) # need to forcibly kill_session since # can't always reopen session = self.start_bloomberg_session() if session is not None: if session.openService("//blp/refdata"): i = 6 else: logger.info("Try opening Bloomberg session... try " + str(i)) session = self.start_bloomberg_session() i = i + 1 # Give error if still doesn't work after several tries.. if not session.openService("//blp/refdata"): logger.error("Failed to open //blp/refdata") return logger.info("Creating request...") eventQueue = blpapi.EventQueue() # eventQueue = None # Create a request from blpapi import CorrelationId options = self.fill_options(md_request) # In some instances we might split the options if need to have # different overrides if isinstance(options, list): data_frame_list = [] for op in options: cid = CorrelationId() data_frame_list.append( download_data_frame(session, eventQueue, op, cid)) data_frame = Calculations().join(data_frame_list) else: cid = CorrelationId() data_frame = download_data_frame(session, eventQueue, options, cid) finally: # stop the session (will fail if NoneType) try: session.stop() except: pass return data_frame
def load_ticker(self, md_request): """Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- md_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ constants = DataConstants() md_request = MarketDataRequest(md_request=md_request) md_request_vendor = self.construct_vendor_md_request(md_request) data_frame = None logger = LoggerManager().getLogger(__name__) logger.info("Request Bloomberg data") # Do we need daily or intraday data? if (md_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']): # Work out the fields which need to be downloaded via Bloomberg ref request (BDP) and # those that can be downloaded via Historical request (BDH) ref_fields = [] ref_vendor_fields = [] # Get user defined list of BBG fields/vendor fields which need to # be downloaded by BDP bbg_ref_fields = list(constants.bbg_ref_fields.keys()) bbg_ref_vendor_fields = list(constants.bbg_ref_fields.values()) for i in range(0, len(md_request.fields)): if md_request.fields[i] in bbg_ref_fields \ or md_request_vendor.fields[ i] in bbg_ref_vendor_fields: ref_fields.append(md_request.fields[i]) ref_vendor_fields.append(md_request_vendor.fields[i]) non_ref_fields = [] non_ref_vendor_fields = [] for i in range(0, len(md_request.fields)): if md_request.fields[i] not in bbg_ref_fields \ and md_request_vendor.fields[ i] not in bbg_ref_vendor_fields: non_ref_fields.append(md_request.fields[i]) non_ref_vendor_fields.append(md_request_vendor.fields[i]) # For certain cases, need to use ReferenceDataRequest # eg. for events times/dates, last tradeable date fields (when specified) if len(ref_fields) > 0: # Careful: make sure you copy the market data request object # (when threading, altering that can # cause concurrency issues!) old_fields = copy.deepcopy(md_request.fields) old_vendor_fields = copy.deepcopy(md_request_vendor.fields) # md_request = MarketDataRequest(md_request=md_request_copy) md_request.fields = ref_fields md_request.vendor_fields = ref_vendor_fields md_request_vendor = self.construct_vendor_md_request( md_request) # Just select those reference fields to download via reference datetime_data_frame = self.get_reference_data( md_request_vendor, md_request) # Download all the other event or non-ref fields # (uses HistoricalDataRequest to Bloomberg) # concatenate with date time fields if len(non_ref_fields) > 0: md_request.fields = non_ref_fields md_request.vendor_fields = non_ref_vendor_fields md_request_vendor = self.construct_vendor_md_request( md_request) events_data_frame = self.get_daily_data( md_request, md_request_vendor) col = events_data_frame.index.name events_data_frame = events_data_frame.reset_index( drop=False) data_frame = pd.concat( [events_data_frame, datetime_data_frame], axis=1) temp = data_frame[col] del data_frame[col] data_frame.index = temp else: data_frame = datetime_data_frame md_request.fields = copy.deepcopy(old_fields) md_request_vendor.fields = copy.deepcopy(old_vendor_fields) # For all other daily/monthly/quarter data, we can use # HistoricalDataRequest to Bloomberg else: data_frame = self.get_daily_data(md_request, md_request_vendor) # if data_frame is not None: # # Convert fields with release-dt to dates (special case!) and assume everything else numerical # for c in data_frame.columns: # try: # if 'release-dt' in c: # data_frame[c] = (data_frame[c]).astype('int').astype(str).apply( # lambda x: pd.to_datetime(x, format='%Y%m%d')) # else: # data_frame[c] = pd.to_numeric(data_frame[c]) # except: # pass # Assume one ticker only for intraday data and use IntradayDataRequest # to Bloomberg if (md_request.freq in ['tick', 'intraday', 'second', 'minute', 'hourly']): md_request_vendor.tickers = \ md_request_vendor.tickers[0] if md_request.freq in ['tick', 'second']: data_frame = self.download_tick(md_request_vendor) else: data_frame = self.download_intraday(md_request_vendor) if data_frame is not None: if data_frame.empty: try: logger.info("No tickers returned for: " + md_request_vendor.tickers) except: pass return None cols = data_frame.columns.values import pytz try: data_frame = data_frame.tz_localize(pytz.utc) except: data_frame = data_frame.tz_convert(pytz.utc) cols = md_request.tickers[0] + "." + cols data_frame.columns = cols logger.info("Completed request from Bloomberg.") return data_frame
def process_message(self, msg): logger = LoggerManager().getLogger(__name__) data = collections.defaultdict(dict) # process received events securityDataArray = msg.getElement('securityData') index = 0 single = False for securityData in list(securityDataArray.values()): ticker = securityData.getElementAsString("security") fieldData = securityData.getElement("fieldData") for field in fieldData.elements(): if not field.isValid(): field_name = "%s" % field.name() logger.error(field_name + " is NULL") elif field.isArray(): # iterate over complex data returns. field_name = "%s" % field.name() for i, row in enumerate(field.values()): try: field_val = re.findall(r'"(.*?)"', "%s" % row)[0] except: e = row.getElement(0) # k = str(e.name()) field_val = e.getValue() data[(field_name, ticker)][index] = field_val index = index + 1 else: field_name = "%s" % field.name() data[(field_name, ticker)][0] = field.getValueAsString() index = index + 1 single = True # no need to create multi-index late, # because just row!! CAREFUL!! needed for futures expiries fieldExceptionArray = securityData.getElement("fieldExceptions") for fieldException in list(fieldExceptionArray.values()): errorInfo = fieldException.getElement("errorInfo") print(errorInfo.getElementAsString("category"), ":", \ fieldException.getElementAsString("fieldId")) print("stop") # Explicitly state from_dict (buggy if create pd.DataFrame(data) data_frame = pd.DataFrame.from_dict(data) # If obsolete ticker could return no values if data_frame.empty: return None else: logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) return data_frame
# have vol target for each signal br.signal_vol_adjust = True br.signal_vol_target = 0.05 br.signal_vol_max_leverage = 3 br.signal_vol_periods = 60 br.signal_vol_obs_in_year = 252 br.signal_vol_rebalance_freq = 'BM' br.signal_vol_resample_freq = None tech_params = TechParams(); tech_params.sma_period = 200; indicator = 'SMA' # pick USD crosses in G10 FX # note: we are calculating returns from spot (it is much better to use to total return # indices for FX, which include carry) logger.info("Loading asset data...") tickers = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD', 'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK'] vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS', 'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS'] md_request = MarketDataRequest( start_date = "01 Jan 1989", # start date finish_date = datetime.date.today(), # finish date freq = 'daily', # daily data data_source = 'quandl', # use Quandl as data source tickers = tickers, # ticker (findatapy) fields = ['close'], # which fields to download vendor_tickers = vendor_tickers, # ticker (Quandl)
class TradeAnalysis(object): """Applies some basic trade analysis for a trading strategy (as defined by TradingModel). Use PyFolio to create some basic trading statistics. Also allows you test multiple parameters for a specific strategy (like TC). """ def __init__(self, engine = ChartConstants().chartfactory_default_engine): self.logger = LoggerManager().getLogger(__name__) self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' ' self.SCALE_FACTOR = 3 self.DEFAULT_PLOT_ENGINE = engine self.chart = Chart(engine=self.DEFAULT_PLOT_ENGINE) return def run_strategy_returns_stats(self, trading_model, index = None, engine = 'pyfolio'): """Plots useful statistics for the trading strategy (using PyFolio) Parameters ---------- trading_model : TradingModel defining trading strategy index: DataFrame define strategy by a time series """ if index is None: pnl = trading_model.get_strategy_pnl() else: pnl = index tz = Timezone() calculations = Calculations() if engine == 'pyfolio': # PyFolio assumes UTC time based DataFrames (so force this localisation) try: pnl = tz.localise_index_as_UTC(pnl) except: pass # set the matplotlib style sheet & defaults # at present this only works in Matplotlib engine try: matplotlib.rcdefaults() plt.style.use(ChartConstants().chartfactory_style_sheet['chartpy-pyfolio']) except: pass # TODO for intraday strategies, make daily # convert DataFrame (assumed to have only one column) to Series pnl = calculations.calculate_returns(pnl) pnl = pnl.dropna() pnl = pnl[pnl.columns[0]] fig = pf.create_returns_tear_sheet(pnl, return_fig=True) try: plt.savefig (trading_model.DUMP_PATH + "stats.png") except: pass plt.show() elif engine == 'finmarketpy': # assume we have TradingModel # to do to take in a time series from chartpy import Canvas, Chart pnl = trading_model.plot_strategy_pnl(silent_plot=True) # plot the final strategy individual = trading_model.plot_strategy_group_pnl_trades(silent_plot=True) # plot the individual trade P&Ls pnl_comp = trading_model.plot_strategy_group_benchmark_pnl(silent_plot=True) # plot all the cumulative P&Ls of each component ir_comp = trading_model.plot_strategy_group_benchmark_pnl_ir(silent_plot=True) # plot all the IR of each component leverage = trading_model.plot_strategy_leverage(silent_plot=True) # plot the leverage of the portfolio ind_lev = trading_model.plot_strategy_group_leverage(silent_plot=True) # plot all the individual leverages canvas = Canvas([[pnl, individual], [pnl_comp, ir_comp], [leverage, ind_lev]] ) canvas.generate_canvas(silent_display=False, canvas_plotter='plain') def run_excel_trade_report(self, trading_model, excel_file = 'model.xlsx'): """ run_excel_trade_report - Creates an Excel spreadsheet with model returns and latest trades Parameters ---------- trading_model : TradingModel defining trading strategy (can be a list) """ trading_model_list = trading_model if not(isinstance(trading_model_list, list)): trading_model_list = [trading_model] writer = pandas.ExcelWriter(excel_file, engine='xlsxwriter') for tm in trading_model_list: strategy_name = tm.FINAL_STRATEGY returns = tm.get_strategy_group_benchmark_pnl() returns.to_excel(writer, sheet_name=strategy_name + ' rets', engine='xlsxwriter') # write raw position/trade sizes self.save_positions_trades(tm, tm.get_strategy_signal(),tm.get_strategy_trade(), 'pos', 'trades', writer) if hasattr(tm, '_strategy_signal_notional'): # write position/trade sizes scaled by notional self.save_positions_trades(tm, tm.get_strategy_signal_notional(), tm.get_strategy_trade_notional(), 'pos - Not', 'trades - Not', writer) if hasattr(tm, '_strategy_signal_contracts'): # write position/trade sizes in terms of contract sizes self.save_positions_trades(tm, tm.get_strategy_signal_contracts(), tm.get_strategy_trade_contracts(), 'pos - Cont', 'trades - Cont', writer) # TODO Add summary sheet comparing return statistics for all the different models in the list writer.save() writer.close() def save_positions_trades(self, tm, signals, trades, signal_caption, trade_caption, writer): signals.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' hist ' + signal_caption, engine='xlsxwriter') if hasattr(tm, 'STRIP'): strip = tm.STRIP recent_signals = tm.grab_signals(signals, date=[-1, -2, -5, -10, -20], strip=strip) recent_trades = tm.grab_signals(trades, date=[-1, -2, -5, -10, -20], strip=strip) recent_signals.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' ' + signal_caption, engine='xlsxwriter') recent_trades.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' ' + trade_caption, engine='xlsxwriter') def run_tc_shock(self, strategy, tc = None): if tc is None: tc = [0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0] parameter_list = [{'spot_tc_bp' : x } for x in tc] pretty_portfolio_names = [str(x) + 'bp' for x in tc] # names of the portfolio parameter_type = 'TC analysis' # broad type of parameter name return self.run_arbitrary_sensitivity(strategy, parameter_list=parameter_list, pretty_portfolio_names=pretty_portfolio_names, parameter_type=parameter_type) ###### Parameters and signal generations (need to be customised for every model) def run_arbitrary_sensitivity(self, trading_model, parameter_list = None, parameter_names = None, pretty_portfolio_names = None, parameter_type = None): asset_df, spot_df, spot_df2, basket_dict = trading_model.load_assets() port_list = None ret_stats_list = [] for i in range(0, len(parameter_list)): br = trading_model.load_parameters() current_parameter = parameter_list[i] # for calculating P&L for k in current_parameter.keys(): setattr(br, k, current_parameter[k]) trading_model.br = br # for calculating signals signal_df = trading_model.construct_signal(spot_df, spot_df2, br.tech_params, br) backtest = Backtest() self.logger.info("Calculating... " + str(pretty_portfolio_names[i])) backtest.calculate_trading_PnL(br, asset_df, signal_df) ret_stats_list.append(backtest.get_portfolio_pnl_ret_stats()) stats = str(backtest.get_portfolio_pnl_desc()[0]) port = backtest.get_cumportfolio().resample('B').mean() port.columns = [str(pretty_portfolio_names[i]) + ' ' + stats] if port_list is None: port_list = port else: port_list = port_list.join(port) # reset the parameters of the strategy trading_model.br = trading_model.load_parameters() style = Style() ir = [t.inforatio()[0] for t in ret_stats_list] # if we have too many combinations remove legend and use scaled shaded colour # if len(port_list) > 10: # style.color = 'Blues' # style.display_legend = False # plot all the variations style.resample = 'B' style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + '.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + '.html' style.scale_factor = self.SCALE_FACTOR style.title = trading_model.FINAL_STRATEGY + ' ' + parameter_type self.chart.plot(port_list, chart_type='line', style=style) # plot all the IR in a bar chart form (can be easier to read!) style = Style() style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + ' IR.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + ' IR.html' style.scale_factor = self.SCALE_FACTOR style.title = trading_model.FINAL_STRATEGY + ' ' + parameter_type summary = pandas.DataFrame(index = pretty_portfolio_names, data = ir, columns = ['IR']) self.chart.plot(summary, chart_type='bar', style=style) return port_list ###### Parameters and signal generations (need to be customised for every model) ###### Plot all the output seperately def run_arbitrary_sensitivity_separately(self, trading_model, parameter_list = None, pretty_portfolio_names = None, strip = None): # asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets() final_strategy = trading_model.FINAL_STRATEGY for i in range(0, len(parameter_list)): br = trading_model.fill_backtest_request() current_parameter = parameter_list[i] # for calculating P&L for k in current_parameter.keys(): setattr(br, k, current_parameter[k]) trading_model.FINAL_STRATEGY = final_strategy + " " + pretty_portfolio_names[i] self.logger.info("Calculating... " + pretty_portfolio_names[i]) trading_model.br = br trading_model.construct_strategy(br = br) trading_model.plot_strategy_pnl() trading_model.plot_strategy_leverage() trading_model.plot_strategy_group_benchmark_pnl(strip = strip) # reset the parameters of the strategy trading_model.br = trading_model.fill_backtest_request() trading_model.FINAL_STRATEGY = final_strategy def run_day_of_month_analysis(self, trading_model): from finmarketpy.economics.seasonality import Seasonality calculations = Calculations() seas = Seasonality() trading_model.construct_strategy() pnl = trading_model.get_strategy_pnl() # get seasonality by day of the month pnl = pnl.resample('B').mean() rets = calculations.calculate_returns(pnl) bus_day = seas.bus_day_of_month_seasonality(rets, add_average = True) # get seasonality by month pnl = pnl.resample('BM').mean() rets = calculations.calculate_returns(pnl) month = seas.monthly_seasonality(rets) self.logger.info("About to plot seasonality...") style = Style() # Plotting spot over day of month/month of year style.color = 'Blues' style.scale_factor = self.SCALE_FACTOR style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.html' style.title = trading_model.FINAL_STRATEGY + ' day of month seasonality' style.display_legend = False style.color_2_series = [bus_day.columns[-1]] style.color_2 = ['red'] # red, pink style.linewidth_2 = 4 style.linewidth_2_series = [bus_day.columns[-1]] style.y_axis_2_series = [bus_day.columns[-1]] self.chart.plot(bus_day, chart_type='line', style=style) style = Style() style.scale_factor = self.SCALE_FACTOR style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.html' style.title = trading_model.FINAL_STRATEGY + ' month of year seasonality' self.chart.plot(month, chart_type='line', style=style) return month
class BBGLowLevelTick(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelTick, self).__init__() self.logger = LoggerManager().getLogger(__name__) # constants self.TICK_DATA = blpapi.Name("tickData") self.COND_CODE = blpapi.Name("conditionCodes") self.TICK_SIZE = blpapi.Name("size") self.TIME = blpapi.Name("time") self.TYPE = blpapi.Name("type") self.VALUE = blpapi.Name("value") self.RESPONSE_ERROR = blpapi.Name("responseError") self.CATEGORY = blpapi.Name("category") self.MESSAGE = blpapi.Name("message") self.SESSION_TERMINATED = blpapi.Name("SessionTerminated") def combine_slices(self, data_frame, data_frame_slice): return data_frame.append(data_frame_slice) # populate options for Bloomberg request for asset intraday request def fill_options(self, market_data_request): self._options = OptionsBBG() self._options.security = market_data_request.tickers[0] # get 1st ticker only! self._options.event = market_data_request.trade_side.upper() # self._options.barInterval = market_data_request.freq_mult self._options.startDateTime = market_data_request.start_date self._options.endDateTime = market_data_request.finish_date # self._options.gapFillInitialBar = False if hasattr(self._options.startDateTime, 'microsecond'): self._options.startDateTime = self._options.startDateTime.replace(microsecond=0) if hasattr(self._options.endDateTime, 'microsecond'): self._options.endDateTime = self._options.endDateTime.replace(microsecond=0) return self._options # iterate through Bloomberg output creating a DataFrame output # implements abstract method def process_message(self, msg): data = msg.getElement(self.TICK_DATA).getElement(self.TICK_DATA) self.logger.info("Processing tick data for " + str(self._options.security)) tuple = [] data_vals = data.values() # for item in list(data_vals): # if item.hasElement(self.COND_CODE): # cc = item.getElementAsString(self.COND_CODE) # else: # cc = "" # # # each price time point has multiple fields - marginally quicker # tuple.append(([item.getElementAsFloat(self.VALUE), # item.getElementAsInteger(self.TICK_SIZE)], # item.getElementAsDatetime(self.TIME))) # slightly faster this way (note, we are skipping trade & CC fields) tuple = [([item.getElementAsFloat(self.VALUE), item.getElementAsInteger(self.TICK_SIZE)], item.getElementAsDatetime(self.TIME)) for item in data_vals] data_table = list(map(itemgetter(0), tuple)) time_list = list(map(itemgetter(1), tuple)) try: self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1])) except: self.logger.info("No dates retrieved") return None # create pandas dataframe with the Bloomberg output return pandas.DataFrame(data = data_table, index = time_list, columns=['close', 'ticksize']) # implement abstract method: create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("IntradayTickRequest") # only one security/eventType per request request.set("security", self._options.security) request.getElement("eventTypes").appendValue("TRADE") # request.set("eventTypes", self._options.event) request.set("includeConditionCodes", True) # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT') if self._options.startDateTime and self._options.endDateTime: request.set("startDateTime", self._options.startDateTime) request.set("endDateTime", self._options.endDateTime) self.logger.info("Sending Tick Bloomberg Request...") session.sendRequest(request)
class BBGLowLevelIntraday(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelIntraday, self).__init__() self.logger = LoggerManager().getLogger(__name__) # constants self.BAR_DATA = blpapi.Name("barData") self.BAR_TICK_DATA = blpapi.Name("barTickData") self.OPEN = blpapi.Name("open") self.HIGH = blpapi.Name("high") self.LOW = blpapi.Name("low") self.CLOSE = blpapi.Name("close") self.VOLUME = blpapi.Name("volume") self.NUM_EVENTS = blpapi.Name("numEvents") self.TIME = blpapi.Name("time") def combine_slices(self, data_frame, data_frame_slice): return data_frame.append(data_frame_slice) # populate options for Bloomberg request for asset intraday request def fill_options(self, market_data_request): self._options = OptionsBBG() self._options.security = market_data_request.tickers[0] # get 1st ticker only! self._options.event = market_data_request.trade_side.upper() self._options.barInterval = market_data_request.freq_mult self._options.startDateTime = market_data_request.start_date self._options.endDateTime = market_data_request.finish_date self._options.gapFillInitialBar = False if hasattr(self._options.startDateTime, 'microsecond'): self._options.startDateTime = self._options.startDateTime.replace(microsecond=0) if hasattr(self._options.endDateTime, 'microsecond'): self._options.endDateTime = self._options.endDateTime.replace(microsecond=0) return self._options # iterate through Bloomberg output creating a DataFrame output # implements abstract method def process_message(self, msg): data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA) self.logger.info("Processing intraday data for " + str(self._options.security)) data_vals = list(data.values()) # data_matrix = numpy.zeros([len(data_vals), 6]) # data_matrix.fill(numpy.nan) # # date_index = [None] * len(data_vals) # # for i in range(0, len(data_vals)): # data_matrix[i][0] = data_vals[i].getElementAsFloat(self.OPEN) # data_matrix[i][1] = data_vals[i].getElementAsFloat(self.HIGH) # data_matrix[i][2] = data_vals[i].getElementAsFloat(self.LOW) # data_matrix[i][3] = data_vals[i].getElementAsFloat(self.CLOSE) # data_matrix[i][4] = data_vals[i].getElementAsInteger(self.VOLUME) # data_matrix[i][5] = data_vals[i].getElementAsInteger(self.NUM_EVENTS) # # date_index[i] = data_vals[i].getElementAsDatetime(self.TIME) # # self.logger.info("Dates between " + str(date_index[0]) + " - " + str(date_index[-1])) # # # create pandas dataframe with the Bloomberg output # return pandas.DataFrame(data = data_matrix, index = date_index, # columns=['open', 'high', 'low', 'close', 'volume', 'events']) ## for loop method is touch slower # time_list = [] # data_table = [] # for bar in data_vals: # data_table.append([bar.getElementAsFloat(self.OPEN), # bar.getElementAsFloat(self.HIGH), # bar.getElementAsFloat(self.LOW), # bar.getElementAsFloat(self.CLOSE), # bar.getElementAsInteger(self.VOLUME), # bar.getElementAsInteger(self.NUM_EVENTS)]) # # time_list.append(bar.getElementAsDatetime(self.TIME)) # each price time point has multiple fields - marginally quicker tuple = [([bar.getElementAsFloat(self.OPEN), bar.getElementAsFloat(self.HIGH), bar.getElementAsFloat(self.LOW), bar.getElementAsFloat(self.CLOSE), bar.getElementAsInteger(self.VOLUME), bar.getElementAsInteger(self.NUM_EVENTS)], bar.getElementAsDatetime(self.TIME)) for bar in data_vals] data_table = list(map(itemgetter(0), tuple)) time_list = list(map(itemgetter(1), tuple)) try: self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1])) except: self.logger.info("No dates retrieved") return None # create pandas dataframe with the Bloomberg output return pandas.DataFrame(data = data_table, index = time_list, columns=['open', 'high', 'low', 'close', 'volume', 'events']) # implement abstract method: create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("IntradayBarRequest") # only one security/eventType per request request.set("security", self._options.security) request.set("eventType", self._options.event) request.set("interval", self._options.barInterval) # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT') if self._options.startDateTime and self._options.endDateTime: request.set("startDateTime", self._options.startDateTime) request.set("endDateTime", self._options.endDateTime) if self._options.gapFillInitialBar: request.append("gapFillInitialBar", True) self.logger.info("Sending Intraday Bloomberg Request...") session.sendRequest(request)
class BBGLowLevelRef(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelRef, self).__init__() self.logger = LoggerManager().getLogger(__name__) self._options = [] # populate options for Bloomberg request for asset intraday request def fill_options(self, market_data_request): self._options = OptionsBBG() self._options.security = market_data_request.tickers self._options.startDateTime = market_data_request.start_date self._options.endDateTime = market_data_request.finish_date self._options.fields = market_data_request.fields return self._options def process_message(self, msg): data = collections.defaultdict(dict) # process received events securityDataArray = msg.getElement('securityData') index = 0 for securityData in list(securityDataArray.values()): ticker = securityData.getElementAsString("security") fieldData = securityData.getElement("fieldData") for field in fieldData.elements(): if not field.isValid(): field_name = "%s" % field.name() self.logger.error(field_name + " is NULL") elif field.isArray(): # iterate over complex data returns. field_name = "%s" % field.name() for i, row in enumerate(field.values()): data[(field_name, ticker)][index] = re.findall(r'"(.*?)"', "%s" % row)[0] index = index + 1 # else: # vals.append(re.findall(r'"(.*?)"', "%s" % row)[0]) # print("%s = %s" % (field.name(), field.getValueAsString())) fieldExceptionArray = securityData.getElement("fieldExceptions") for fieldException in list(fieldExceptionArray.values()): errorInfo = fieldException.getElement("errorInfo") print(errorInfo.getElementAsString("category"), ":", \ fieldException.getElementAsString("fieldId")) data_frame = pandas.DataFrame(data) # if obsolete ticker could return no values if (not(data_frame.empty)): data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker']) self.logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) else: return None return data_frame def combine_slices(self, data_frame, data_frame_slice): if (data_frame_slice.columns.get_level_values(1).values[0] not in data_frame.columns.get_level_values(1).values): return data_frame.join(data_frame_slice, how="outer") return data_frame # create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest('ReferenceDataRequest') self.add_override(request, 'TIME_ZONE_OVERRIDE', 23) # force GMT time self.add_override(request, 'START_DT', self._options.startDateTime.strftime('%Y%m%d')) self.add_override(request, 'END_DT', self._options.endDateTime.strftime('%Y%m%d')) # only one security/eventType per request for field in self._options.fields: request.getElement("fields").appendValue(field) for security in self._options.security: request.getElement("securities").appendValue(security) self.logger.info("Sending Bloomberg Ref Request:" + str(request)) session.sendRequest(request)
class BBGLowLevelDaily(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelDaily, self).__init__() self.logger = LoggerManager().getLogger(__name__) self._options = [] def combine_slices(self, data_frame, data_frame_slice): if (data_frame_slice.columns.get_level_values(1).values[0] not in data_frame.columns.get_level_values(1).values): return data_frame.join(data_frame_slice, how="outer") return data_frame # populate options for Bloomberg request for asset daily request def fill_options(self, market_data_request): self._options = OptionsBBG() self._options.security = market_data_request.tickers self._options.startDateTime = market_data_request.start_date self._options.endDateTime = market_data_request.finish_date self._options.fields = market_data_request.fields return self._options def process_message(self, msg): # Process received events ticker = msg.getElement('securityData').getElement('security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') # SLOW loop (careful, not all the fields will be returned every time # hence need to include the field name in the tuple) data = defaultdict(dict) for i in range(fieldData.numValues()): for j in range(1, fieldData.getValue(i).numElements()): data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \ = fieldData.getValue(i).getElement(j).getValue() data_frame = pandas.DataFrame(data) # if obsolete ticker could return no values if (not(data_frame.empty)): # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker']) data_frame.index = pandas.to_datetime(data_frame.index) self.logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) else: return None return data_frame # create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("HistoricalDataRequest") request.set("startDate", self._options.startDateTime.strftime('%Y%m%d')) request.set("endDate", self._options.endDateTime.strftime('%Y%m%d')) # # only one security/eventType per request for field in self._options.fields: request.getElement("fields").appendValue(field) for security in self._options.security: request.getElement("securities").appendValue(security) self.logger.info("Sending Bloomberg Daily Request:" + str(request)) session.sendRequest(request)
class IOEngine(object): """Write and reads time series data to disk in various formats, CSV, HDF5 (fixed and table formats) and MongoDB/Arctic. Can be used to save down output of finmarketpy backtests and also to cache market data locally. Also supports BColz (but not currently stable). Planning to add other interfaces such as SQL etc. """ def __init__(self): self.logger = LoggerManager().getLogger(__name__) ### functions to handle Excel on disk def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False): """Writes Pandas data frame to disk in Excel format Parameters ---------- fname : str Excel filename to be written to sheet : str sheet in excel data_frame : DataFrame data frame to be written create_new : boolean to create a new Excel file """ if (create_new): writer = pandas.ExcelWriter(fname, engine='xlsxwriter') else: if os.path.isfile(fname): book = load_workbook(fname) writer = pandas.ExcelWriter(fname, engine='xlsxwriter') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) else: writer = pandas.ExcelWriter(fname, engine='xlsxwriter') data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter') writer.save() writer.close() def write_time_series_to_excel_writer(self, writer, sheet, data_frame): """Writes Pandas data frame to disk in Excel format for a writer Parameters ---------- writer : ExcelWriter File handle to use for writing Excel file to disk sheet : str sheet in excel data_frame : DataFrame data frame to be written """ data_frame.to_excel(writer, sheet, engine='xlsxwriter') def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff=None, dateparse=None, postfix='.close', intraday_tz='UTC'): """Reads Excel from disk into DataFrame Parameters ---------- f_name : str Excel file path to read freq : str Frequency of data to read (intraday/daily etc) cutoff : DateTime (optional) end date to read up to dateparse : str (optional) date parser to use postfix : str (optional) postfix to add to each columns intraday_tz : str timezone of file if uses intraday data Returns ------- DataFrame """ return self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse, postfix=postfix, intraday_tz=intraday_tz, excel_sheet=excel_sheet) def remove_time_series_cache_on_disk(self, fname, engine='hdf5_fixed', db_server='127.0.0.1', db_port='6379', timeout=10, username=None, password=None): if 'hdf5' in engine: engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) pass elif (engine == 'redis'): import redis fname = os.path.basename(fname).replace('.', '_') try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) if (fname == 'flush_all_keys'): r.flushall() else: # allow deletion of keys by pattern matching if "*" in fname: x = r.keys(fname) if len(x) > 0: r.delete(x) r.delete(fname) except Exception as e: self.logger.warning("Cannot delete non-existent key " + fname + " in Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') self.logger.info('Load MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) store.delete_library(fname) c.close() self.logger.info("Deleted MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # delete the old copy try: os.remove(h5_filename) except: pass ### functions to handle HDF5 on disk def write_time_series_cache_to_disk(self, fname, data_frame, engine='hdf5_fixed', append_data=False, db_server=DataConstants().db_server, db_port=DataConstants().db_port, username=None, password=None, filter_out_matching=None, timeout=10): """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic Parmeters --------- fname : str path of file data_frame : DataFrame data frame to be written to disk engine : str 'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this 'hdf5_table' - use HDF5 table format, slower but can append to 'parquet' - use Parquet 'arctic' - use Arctic/MongoDB database 'redis' - use Redis append_data : bool False - write a fresh copy of data on disk each time True - append data to disk db_server : str Database server for arctic (default: '127.0.0.1') timeout : int Number of seconds to do timeout """ # default HDF5 format hdf5_format = 'fixed' if 'hdf5' in engine: hdf5_format = engine.split('_')[1] engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) data_frame.columns = self.find_replace_chars(data_frame.columns, _invalid_chars, _replace_chars) data_frame.columns = ['A_' + x for x in data_frame.columns] data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns') bcolzpath = self.get_bcolz_filename(fname) shutil.rmtree(bcolzpath, ignore_errors=True) zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath) elif (engine == 'redis'): import redis fname = os.path.basename(fname).replace('.', '_') try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) if isinstance(data_frame, pandas.DataFrame): r.set(fname, data_frame.to_msgpack(compress='blosc')) self.logger.info("Pushed " + fname + " to Redis") except Exception as e: self.logger.warning("Couldn't push " + fname + " to Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') self.logger.info('Load Arctic/MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) database = None try: database = store[fname] except: pass if database is None: store.initialize_library(fname, audit=False) self.logger.info("Created MongoDB library: " + fname) else: self.logger.info("Got MongoDB library: " + fname) # Access the library library = store[fname] if ('intraday' in fname): data_frame = data_frame.astype('float32') if filter_out_matching is not None: cols = data_frame.columns new_cols = [] for col in cols: if filter_out_matching not in col: new_cols.append(col) data_frame = data_frame[new_cols] # can duplicate values if we have existing dates if append_data: library.append(fname, data_frame) else: library.write(fname, data_frame) c.close() self.logger.info("Written MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # append data only works for HDF5 stored as tables (but this is much slower than fixed format) # removes duplicated entries at the end if append_data: store = pandas.HDFStore(h5_filename, format=hdf5_format, complib="blosc", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') # get last row which matches and remove everything after that (because append # function doesn't check for duplicated rows nrows = len(store['data'].index) last_point = data_frame.index[-1] i = nrows - 1 while (i > 0): read_index = store.select('data', start=i, stop=nrows).index[0] if (read_index <= last_point): break i = i - 1 # remove rows at the end, which are duplicates of the incoming time series store.remove(key='data', start=i, stop=nrows) store.put(key='data', value=data_frame, format=hdf5_format, append=True) store.close() else: h5_filename_temp = self.get_h5_filename(fname + ".temp") # delete the old copy try: os.remove(h5_filename_temp) except: pass store = pandas.HDFStore(h5_filename_temp, format=hdf5_format, complib="blosc", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') store.put(key='data', value=data_frame, format=hdf5_format) store.close() # delete the old copy try: os.remove(h5_filename) except: pass # once written to disk rename os.rename(h5_filename_temp, h5_filename) self.logger.info("Written HDF5: " + fname) elif (engine == 'parquet'): if fname[-5:] != '.gzip': fname = fname + '.gzip' data_frame.to_parquet(fname, compression='gzip') self.logger.info("Written Parquet: " + fname) def get_h5_filename(self, fname): """Strips h5 off filename returning first portion of filename Parameters ---------- fname : str h5 filename to strip Returns ------- str """ if fname[-3:] == '.h5': return fname return fname + ".h5" def get_bcolz_filename(self, fname): """Strips bcolz off filename returning first portion of filename Parameters ---------- fname : str bcolz filename to strip Returns ------- str """ if fname[-6:] == '.bcolz': return fname return fname + ".bcolz" def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields=None): """Write a DataFrame to disk in as an R compatible HDF5 file. Parameters ---------- data_frame : DataFrame data frame to be written fname : str file path to be written fields : list(str) columns to be written """ fname_r = self.get_h5_filename(fname) self.logger.info("About to dump R binary HDF5 - " + fname_r) data_frame32 = data_frame.astype('float32') if fields is None: fields = data_frame32.columns.values # decompose date/time into individual fields (easier to pick up in R) data_frame32['Year'] = data_frame.index.year data_frame32['Month'] = data_frame.index.month data_frame32['Day'] = data_frame.index.day data_frame32['Hour'] = data_frame.index.hour data_frame32['Minute'] = data_frame.index.minute data_frame32['Second'] = data_frame.index.second data_frame32['Millisecond'] = data_frame.index.microsecond / 1000 data_frame32 = data_frame32[ ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'] + fields] cols = data_frame32.columns store_export = pandas.HDFStore(fname_r) store_export.put('df_for_r', data_frame32, data_columns=cols) store_export.close() def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None, finish_date=None, db_server=DataConstants().db_server, db_port=DataConstants().db_port, username=None, password=None): """Reads time series cache from disk in either HDF5 or bcolz Parameters ---------- fname : str (or list) file to be read from engine : str (optional) 'hd5' - reads HDF5 files (default) 'arctic' - reads from Arctic/MongoDB database 'bcolz' = reads from bcolz file (not fully implemented) start_date : str/datetime (optional) Start date finish_date : str/datetime (optional) Finish data db_server : str IP address of MongdDB (default '127.0.0.1') Returns ------- DataFrame """ logger = LoggerManager.getLogger(__name__) data_frame_list = [] if not(isinstance(fname, list)): if '*' in fname: fname = glob.glob(fname) else: fname = [fname] for fname_single in fname: logger.debug("Reading " + fname_single + "..") if (engine == 'bcolz'): try: name = self.get_bcolz_filename(fname_single) zlens = bcolz.open(rootdir=name) data_frame = zlens.todataframe() data_frame.index = pandas.DatetimeIndex(data_frame['DTS_']) data_frame.index.name = 'Date' del data_frame['DTS_'] # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas data_frame.columns = self.find_replace_chars(data_frame.columns, _replace_chars, _invalid_chars) data_frame.columns = [x[2:] for x in data_frame.columns] except: data_frame = None elif (engine == 'redis'): import redis fname_single = os.path.basename(fname_single).replace('.', '_') msg = None try: r = redis.StrictRedis(host=db_server, port=db_port, db=0) msg = r.get(fname_single) except: self.logger.info("Cache not existent for " + fname_single + " in Redis") if msg is None: data_frame = None else: self.logger.info('Load Redis cache: ' + fname_single) data_frame = pandas.read_msgpack(msg) elif (engine == 'arctic'): socketTimeoutMS = 2 * 1000 import pymongo from arctic import Arctic fname_single = os.path.basename(fname_single).replace('.', '_') self.logger.info('Load Arctic/MongoDB library: ' + fname_single) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS) # Access the library try: library = store[fname_single] if start_date is None and finish_date is None: item = library.read(fname_single) else: from arctic.date import DateRange item = library.read(fname_single, date_range=DateRange(start_date, finish_date)) c.close() self.logger.info('Read ' + fname_single) data_frame = item.data except Exception as e: self.logger.warning('Library does not exist: ' + fname_single + ' & message is ' + str(e)) data_frame = None elif os.path.isfile(self.get_h5_filename(fname_single)): store = pandas.HDFStore(self.get_h5_filename(fname_single)) data_frame = store.select("data") if ('intraday' in fname_single): data_frame = data_frame.astype('float32') store.close() elif os.path.isfile(fname_single): data_frame = pandas.read_parquet(fname_single) data_frame_list.append(data_frame) if len(data_frame_list) == 1: return data_frame_list[0] return data_frame_list ### functions for CSV reading and writing def write_time_series_to_csv(self, csv_path, data_frame): data_frame.to_csv(csv_path) def read_csv_data_frame(self, f_name, freq, cutoff=None, dateparse=None, postfix='.close', intraday_tz='UTC', excel_sheet=None): """Reads CSV/Excel from disk into DataFrame Parameters ---------- f_name : str CSV/Excel file path to read freq : str Frequency of data to read (intraday/daily etc) cutoff : DateTime (optional) end date to read up to dateparse : str (optional) date parser to use postfix : str (optional) postfix to add to each columns intraday_tz : str (optional) timezone of file if uses intraday data excel_sheet : str (optional) Excel sheet to be read Returns ------- DataFrame """ if (freq == 'intraday'): if dateparse is None: dateparse = lambda x: datetime.datetime(*map(int, [x[6:10], x[3:5], x[0:2], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'dukascopy': dateparse = lambda x: datetime.datetime(*map(int, [x[0:4], x[5:7], x[8:10], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'c': # use C library for parsing dates, several hundred times quicker # requires compilation of library to install import ciso8601 dateparse = lambda x: ciso8601.parse_datetime(x) if excel_sheet is None: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=True, date_parser=dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA']) data_frame = data_frame.astype('float32') data_frame.index.names = ['Date'] old_cols = data_frame.columns new_cols = [] # add '.close' to each column name for col in old_cols: new_cols.append(col + postfix) data_frame.columns = new_cols else: # daily data if 'events' in f_name: data_frame = pandas.read_csv(f_name) # very slow conversion data_frame = data_frame.convert_objects(convert_dates='coerce') else: if excel_sheet is None: try: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["DATE"], date_parser=dateparse) except: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["Date"], date_parser=dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA']) # convert Date to Python datetime # datetime data_frame['Date1'] = data_frame.index # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S') # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]), # int(x[12:13]), int(x[15:16]), int(x[18:19]))) # data_frame.index = data_frame['Date1'] # data_frame.drop('Date1') # slower method: data_frame.index = pandas.to_datetime(data_frame.index) if (freq == 'intraday'): # assume time series are already in UTC and assign this (can specify other time zones) data_frame = data_frame.tz_localize(intraday_tz) # end cutoff date if cutoff is not None: if (isinstance(cutoff, str)): cutoff = parse(cutoff) data_frame = data_frame.loc[data_frame.index < cutoff] return data_frame def find_replace_chars(self, array, to_find, replace_with): for i in range(0, len(to_find)): array = [x.replace(to_find[i], replace_with[i]) for x in array] return array def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None): """Converts CSV file to HDF5 file Parameters ---------- f_name : str File name to be read category : str data category of file (used in HDF5 filename) freq : str intraday/daily frequency (used in HDF5 filename) cutoff : DateTime (optional) filter dates up to here dateparse : str date parser to use """ self.logger.info("About to read... " + f_name) data_frame = self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse) category_f_name = self.create_cache_file_name(category) self.write_time_series_cache_to_disk(category_f_name, data_frame) def clean_csv_file(self, f_name): """Cleans up CSV file (removing empty characters) before writing back to disk Parameters ---------- f_name : str CSV file to be cleaned """ with codecs.open(f_name, 'rb', 'utf-8') as myfile: data = myfile.read() # clean file first if dirty if data.count('\x00'): self.logger.info('Cleaning CSV...') with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of: of.write(data.replace('\x00', '')) shutil.move(f_name + '.tmp', f_name) def create_cache_file_name(self, filename): return DataConstants().folder_time_series_data + "/" + filename # TODO refactor IOEngine so that each database is implemented in a subclass of DBEngine def get_engine(self, engine='hdf5_fixed'): pass
class DataVendorBBG(DataVendor): def __init__(self): super(DataVendorBBG, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, market_data_request): """ load_ticker - Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ market_data_request_vendor = self.construct_vendor_market_data_request(market_data_request) data_frame = None self.logger.info("Request Bloomberg data") # do we need daily or intraday data? if (market_data_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']): # for events times/dates separately needs ReferenceDataRequest (when specified) if 'release-date-time-full' in market_data_request.fields: # experimental!! # careful: make sure you copy the market data request object (when threading, altering that can # cause concurrency issues!) datetime_data_frame = self.get_reference_data(market_data_request_vendor, market_data_request) old_fields = copy.deepcopy(market_data_request.fields) old_vendor_fields = copy.deepcopy(market_data_request_vendor.fields) # remove fields 'release-date-time-full' from our request (and the associated field in the vendor) # if they are there try: index = market_data_request.fields.index('release-date-time-full') market_data_request.fields.pop(index) market_data_request_vendor.fields.pop(index) except: pass # download all the other event fields (uses HistoricalDataRequest to Bloomberg) # concatenate with date time fields if len(market_data_request_vendor.fields) > 0: events_data_frame = self.get_daily_data(market_data_request, market_data_request_vendor) col = events_data_frame.index.name events_data_frame = events_data_frame.reset_index(drop = False) data_frame = pandas.concat([events_data_frame, datetime_data_frame], axis = 1) temp = data_frame[col] del data_frame[col] data_frame.index = temp else: data_frame = datetime_data_frame market_data_request.fields = old_fields market_data_request_vendor.fields = old_vendor_fields # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg else: data_frame = self.get_daily_data(market_data_request, market_data_request_vendor) # assume one ticker only # for intraday data we use IntradayDataRequest to Bloomberg if (market_data_request.freq in ['tick', 'intraday', 'second', 'minute', 'hourly']): market_data_request_vendor.tickers = market_data_request_vendor.tickers[0] if market_data_request.freq in ['tick', 'second']: data_frame = self.download_tick(market_data_request_vendor) else: data_frame = self.download_intraday(market_data_request_vendor) if data_frame is not None: if data_frame.empty: try: self.logger.info("No tickers returned for: " + market_data_request_vendor.tickers) except: pass return None cols = data_frame.columns.values import pytz try: data_frame = data_frame.tz_localize(pytz.utc) except: data_frame = data_frame.tz_convert(pytz.utc) cols = market_data_request.tickers[0] + "." + cols data_frame.columns = cols self.logger.info("Completed request from Bloomberg.") return data_frame def get_daily_data(self, market_data_request, market_data_request_vendor): data_frame = self.download_daily(market_data_request_vendor) # convert from vendor to findatapy tickers/fields if data_frame is not None: if data_frame.empty: self.logger.info("No tickers returned for...") try: self.logger.info(str(market_data_request_vendor.tickers)) except: pass return None returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) # TODO if empty try downloading again a year later try: fields = self.translate_from_vendor_field(returned_fields, market_data_request) except: print('t') tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' return data_frame def get_reference_data(self, market_data_request_vendor, market_data_request): end = datetime.utcnow() from datetime import timedelta end = end + timedelta(days=365)# end.replace(year = end.year + 1) market_data_request_vendor.finish_date = end self.logger.debug("Requesting ref for " + market_data_request_vendor.tickers[0] + " etc.") data_frame = self.download_ref(market_data_request_vendor) self.logger.debug("Waiting for ref...") # convert from vendor to findatapy tickers/fields if data_frame is not None: returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) if data_frame is not None: # TODO if empty try downloading again a year later fields = self.translate_from_vendor_field(returned_fields, market_data_request) tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined # TODO coerce will be deprecated from pandas data_frame = data_frame.convert_objects(convert_dates = 'coerce', convert_numeric= 'coerce') return data_frame # implement method in abstract superclass @abc.abstractmethod def kill_session(self): return @abc.abstractmethod def download_tick(self, market_data_request): return @abc.abstractmethod def download_intraday(self, market_data_request): return @abc.abstractmethod def download_daily(self, market_data_request): return @abc.abstractmethod def download_ref(self, market_data_request): return