def load_assets(self, br = None): ##### FILL IN WITH YOUR ASSET DATA from findatapy.util.loggermanager import LoggerManager logger = LoggerManager().getLogger(__name__) # for FX basket full_bkt = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD', 'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK'] basket_dict = {} for i in range(0, len(full_bkt)): basket_dict[full_bkt[i]] = [full_bkt[i]] basket_dict['FX trend'] = full_bkt br = self.load_parameters(br = br) logger.info("Loading asset data...") vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS', 'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS'] market_data_request = MarketDataRequest( start_date = br.start_date, # start date finish_date = br.finish_date, # finish date freq = 'daily', # daily data data_source = 'quandl', # use Quandl as data source tickers = full_bkt, # ticker (Thalesians) fields = ['close'], # which fields to download vendor_tickers = vendor_tickers, # ticker (Quandl) vendor_fields = ['close'], # which Bloomberg fields to download cache_algo = 'cache_algo_return') # how to return data asset_df = self.market.fetch_market(market_data_request) # if web connection fails read from CSV if asset_df is None: import pandas asset_df = pandas.read_csv("d:/fxcta.csv", index_col=0, parse_dates=['Date'], date_parser = lambda x: pandas.datetime.strptime(x, '%Y-%m-%d')) # signalling variables spot_df = asset_df spot_df2 = None # asset_df return asset_df, spot_df, spot_df2, basket_dict
def __init__(self, engine = ChartConstants().chartfactory_default_engine): self.logger = LoggerManager().getLogger(__name__) self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' ' self.SCALE_FACTOR = 3 self.DEFAULT_PLOT_ENGINE = engine self.chart = Chart(engine=self.DEFAULT_PLOT_ENGINE) return
def __init__(self): super(MarketDataRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None # output parameters for backtest (should we add returns statistics on legends, write CSVs with returns etc.) self.__plot_start = None self.__calc_stats = True self.__write_csv = False self.__write_csv_pnl = False self.__plot_interim = False self.__include_benchmark = False self.__tech_params = TechParams() # default parameters for portfolio level vol adjustment self.__portfolio_vol_adjust = False self.__portfolio_vol_period_shift = 0 self.__portfolio_vol_rebalance_freq = None self.__portfolio_vol_resample_freq = None self.__portfolio_vol_resample_type = 'mean' self.__portfolio_vol_target = 0.1 # 10% vol target self.__portfolio_vol_max_leverage = None self.__portfolio_vol_periods = 20 self.__portfolio_vol_obs_in_year = 252 # default parameters for signal level vol adjustment self.__signal_vol_adjust = False self.__signal_vol_period_shift = 0 self.__signal_vol_rebalance_freq = None self.__signal_vol_resample_freq = None self.__signal_vol_resample_type = 'mean' self.__signal_vol_target = 0.1 # 10% vol target self.__signal_vol_max_leverage = None self.__signal_vol_periods = 20 self.__signal_vol_obs_in_year = 252 # portfolio notional size self.__portfolio_notional_size = None self.__portfolio_combination = None self.__portfolio_combination_weights = None # parameters for maximum position limits (expressed as whole portfolio) self.__max_net_exposure = None self.__max_abs_exposure = None self.__position_clip_rebalance_freq = None self.__position_clip_resample_freq = None # by default apply max position criterion on last business day of month self.__position_clip_resample_type = 'mean' self.__position_clip_period_shift = 0 # take profit and stop loss parameters self.__take_profit = None self.__stop_loss = None # should we delay the signal? self.__signal_delay = 0
def process_message(self, msg): constants = DataConstants() # Process received events # SLOW loop (careful, not all the fields will be returned every time hence need to include the field name in the tuple) # perhaps try to run in parallel? logger = LoggerManager().getLogger(__name__) implementation = 'simple' if implementation == 'simple': ticker = msg.getElement('securityData').getElement( 'security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') data = defaultdict(dict) # # # FASTER avoid calling getValue/getElement methods in blpapi, very slow, better to cache variables for i in range(fieldData.numValues()): mini_field_data = fieldData.getValue(i) date = mini_field_data.getElement(0).getValue() for j in range(1, mini_field_data.numElements()): field_value = mini_field_data.getElement(j) data[(str(field_value.name()), ticker)][date] = field_value.getValue() # ORIGINAL repeated calling getValue/getElement much slower # for i in range(fieldData.numValues()): # for j in range(1, fieldData.getValue(i).numElements()): # data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \ # = fieldData.getValue(i).getElement(j).getValue() elif implementation == 'py4j': pass # TODO Py4J # from findatapy.market.bbgloop import bbgloop # from py4j.java_gateway import JavaGateway # gateway = JavaGateway() # data = gateway.entry_point.parseFieldDataArray(msg) elif implementation == 'cython': ticker = msg.getElement('securityData').getElement( 'security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') from findatapy.market.bbgloop import bbgloop data = bbgloop(fieldData, ticker) elif implementation == 'numba': ticker = msg.getElement('securityData').getElement( 'security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') from findatapy.market.bbgloop_numba import bbgloop_numba data = bbgloop_numba(fieldData, ticker) # TODO cython data_frame = pd.DataFrame(data) # if obsolete ticker could return no values if (not (data_frame.empty)): # data_frame.columns = pd.MultiIndex.from_tuples(data, names=['field', 'ticker']) data_frame.index = pd.to_datetime(data_frame.index) logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) else: return None return data_frame
def load_ticker(self, market_data_request): """Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ constants = DataConstants() market_data_request = MarketDataRequest(md_request=market_data_request) market_data_request_vendor = self.construct_vendor_market_data_request( market_data_request) data_frame = None logger = LoggerManager().getLogger(__name__) logger.info("Request Bloomberg data") # Do we need daily or intraday data? if (market_data_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']): # Work out the fields which need to be downloaded via Bloomberg ref request (BDP) and # those that can be downloaded via Historical request (BDH) ref_fields = [] ref_vendor_fields = [] # Get user defined list of BBG fields/vendor fields which need to be downloaded by BDP bbg_ref_fields = list(constants.bbg_ref_fields.keys()) bbg_ref_vendor_fields = list(constants.bbg_ref_fields.values()) for i in range(0, len(market_data_request.fields)): if market_data_request.fields[i] in bbg_ref_fields \ or market_data_request_vendor.fields[i] in bbg_ref_vendor_fields: ref_fields.append(market_data_request.fields[i]) ref_vendor_fields.append( market_data_request_vendor.fields[i]) non_ref_fields = [] non_ref_vendor_fields = [] for i in range(0, len(market_data_request.fields)): if market_data_request.fields[i] not in bbg_ref_fields \ and market_data_request_vendor.fields[i] not in bbg_ref_vendor_fields: non_ref_fields.append(market_data_request.fields[i]) non_ref_vendor_fields.append( market_data_request_vendor.fields[i]) # For certain cases, need to use ReferenceDataRequest # eg. for events times/dates, last tradeable date fields (when specified) if len(ref_fields) > 0: # Careful: make sure you copy the market data request object (when threading, altering that can # cause concurrency issues!) old_fields = copy.deepcopy(market_data_request.fields) old_vendor_fields = copy.deepcopy( market_data_request_vendor.fields) # md_request = MarketDataRequest(md_request=market_data_request_copy) market_data_request.fields = ref_fields market_data_request.vendor_fields = ref_vendor_fields market_data_request_vendor = self.construct_vendor_market_data_request( market_data_request) # Just select those reference fields to download via reference datetime_data_frame = self.get_reference_data( market_data_request_vendor, market_data_request) # Download all the other event or non-ref fields (uses HistoricalDataRequest to Bloomberg) # concatenate with date time fields if len(non_ref_fields) > 0: market_data_request.fields = non_ref_fields market_data_request.vendor_fields = non_ref_vendor_fields market_data_request_vendor = self.construct_vendor_market_data_request( market_data_request) events_data_frame = self.get_daily_data( market_data_request, market_data_request_vendor) col = events_data_frame.index.name events_data_frame = events_data_frame.reset_index( drop=False) data_frame = pd.concat( [events_data_frame, datetime_data_frame], axis=1) temp = data_frame[col] del data_frame[col] data_frame.index = temp else: data_frame = datetime_data_frame market_data_request.fields = copy.deepcopy(old_fields) market_data_request_vendor.fields = copy.deepcopy( old_vendor_fields) # For all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg else: data_frame = self.get_daily_data(market_data_request, market_data_request_vendor) # if data_frame is not None: # # Convert fields with release-dt to dates (special case!) and assume everything else numerical # for c in data_frame.columns: # try: # if 'release-dt' in c: # data_frame[c] = (data_frame[c]).astype('int').astype(str).apply( # lambda x: pd.to_datetime(x, format='%Y%m%d')) # else: # data_frame[c] = pd.to_numeric(data_frame[c]) # except: # pass # Assume one ticker only for intraday data and use IntradayDataRequest to Bloomberg if (market_data_request.freq in ['tick', 'intraday', 'second', 'minute', 'hourly']): market_data_request_vendor.tickers = market_data_request_vendor.tickers[ 0] if market_data_request.freq in ['tick', 'second']: data_frame = self.download_tick(market_data_request_vendor) else: data_frame = self.download_intraday(market_data_request_vendor) if data_frame is not None: if data_frame.empty: try: logger.info("No tickers returned for: " + market_data_request_vendor.tickers) except: pass return None cols = data_frame.columns.values import pytz try: data_frame = data_frame.tz_localize(pytz.utc) except: data_frame = data_frame.tz_convert(pytz.utc) cols = market_data_request.tickers[0] + "." + cols data_frame.columns = cols logger.info("Completed request from Bloomberg.") return data_frame
def write_time_series_cache_to_disk( self, fname, data_frame, engine='hdf5_fixed', append_data=False, db_server=constants.db_server, db_port=constants.db_port, username=constants.db_username, password=constants.db_password, filter_out_matching=None, timeout=10, use_cache_compression=constants.use_cache_compression, parquet_compression=constants.parquet_compression, md_request=None, ticker=None): """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic Parmeters --------- fname : str path of file data_frame : DataFrame data frame to be written to disk engine : str 'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this 'hdf5_table' - use HDF5 table format, slower but can append to 'parquet' - use Parquet 'arctic' - use Arctic/MongoDB database 'redis' - use Redis append_data : bool False - write a fresh copy of data on disk each time True - append data to disk db_server : str Database server for arctic (default: '127.0.0.1') timeout : int Number of seconds to do timeout """ logger = LoggerManager().getLogger(__name__) if md_request is not None: fname = self.path_join( fname, md_request.create_category_key(ticker=ticker)) # default HDF5 format hdf5_format = 'fixed' if 'hdf5' in engine: hdf5_format = engine.split('_')[1] engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) data_frame.columns = self.find_replace_chars( data_frame.columns, _invalid_chars, _replace_chars) data_frame.columns = ['A_' + x for x in data_frame.columns] data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns') bcolzpath = self.get_bcolz_filename(fname) shutil.rmtree(bcolzpath, ignore_errors=True) zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath) elif (engine == 'redis'): fname = os.path.basename(fname).replace('.', '_') # Will fail if Redis is not installed try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) ping = r.ping() # If Redis is alive, try pushing to it if ping: if data_frame is not None: if isinstance(data_frame, pandas.DataFrame): mem = data_frame.memory_usage(deep='deep').sum() mem_float = round( float(mem) / (1024.0 * 1024.0), 3) if mem_float < 500: # msgpack/blosc is deprecated # r.set(fname, data_frame.to_msgpack(compress='blosc')) # now uses pyarrow context = pa.default_serialization_context() ser = context.serialize(data_frame).to_buffer() if use_cache_compression: comp = pa.compress(ser, codec='lz4', asbytes=True) siz = len(ser) # siz = 3912 r.set('comp_' + str(siz) + '_' + fname, comp) else: r.set(fname, ser.to_pybytes()) logger.info("Pushed " + fname + " to Redis") else: logger.warn("Did not push " + fname + " to Redis, given size") else: logger.info("Object " + fname + " is empty, not pushed to Redis.") else: logger.warning("Didn't push " + fname + " to Redis given not running") except Exception as e: logger.warning("Couldn't push " + fname + " to Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') logger.info('Load Arctic/MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) database = None try: database = store[fname] except: pass if database is None: store.initialize_library(fname, audit=False) logger.info("Created MongoDB library: " + fname) else: logger.info("Got MongoDB library: " + fname) # Access the library library = store[fname] if ('intraday' in fname): data_frame = data_frame.astype('float32') if filter_out_matching is not None: cols = data_frame.columns new_cols = [] for col in cols: if filter_out_matching not in col: new_cols.append(col) data_frame = data_frame[new_cols] # Problems with Arctic when writing timezone to disk sometimes, so strip data_frame = data_frame.copy().tz_localize(None) try: # Can duplicate values if we have existing dates if append_data: library.append(fname, data_frame) else: library.write(fname, data_frame) c.close() logger.info("Written MongoDB library: " + fname) except Exception as e: logger.warning("Couldn't write MongoDB library: " + fname + " " + str(e)) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # append data only works for HDF5 stored as tables (but this is much slower than fixed format) # removes duplicated entries at the end if append_data: store = pandas.HDFStore(h5_filename, format=hdf5_format, complib="zlib", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') # get last row which matches and remove everything after that (because append # function doesn't check for duplicated rows nrows = len(store['data'].index) last_point = data_frame.index[-1] i = nrows - 1 while (i > 0): read_index = store.select('data', start=i, stop=nrows).index[0] if (read_index <= last_point): break i = i - 1 # remove rows at the end, which are duplicates of the incoming time series store.remove(key='data', start=i, stop=nrows) store.put(key='data', value=data_frame, format=hdf5_format, append=True) store.close() else: h5_filename_temp = self.get_h5_filename(fname + ".temp") # delete the old copy try: os.remove(h5_filename_temp) except: pass store = pandas.HDFStore(h5_filename_temp, complib="zlib", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') store.put(key='data', value=data_frame, format=hdf5_format) store.close() # delete the old copy try: os.remove(h5_filename) except: pass # once written to disk rename os.rename(h5_filename_temp, h5_filename) logger.info("Written HDF5: " + fname) elif (engine == 'parquet'): if '.parquet' not in fname: if fname[-5:] != '.gzip': fname = fname + '.parquet' self.to_parquet(data_frame, fname, aws_region=constants.aws_region, parquet_compression=parquet_compression) # data_frame.to_parquet(fname, compression=parquet_compression) logger.info("Written Parquet: " + fname) elif engine == 'csv': if '.csv' not in fname: fname = fname + '.csv' data_frame.to_csv(fname) logger.info("Written CSV: " + fname)
class IOEngine(object): def __init__(self): self.logger = LoggerManager().getLogger(__name__) ### functions to handle Excel on disk def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False): """ write_time_series_to_excel - writes Pandas data frame to disk in Excel format Parameters ---------- fname : str Excel filename to be written to sheet : str sheet in excel data_frame : DataFrame data frame to be written create_new : boolean to create a new Excel file """ if (create_new): writer = pandas.ExcelWriter(fname, engine='xlsxwriter') else: if os.path.isfile(fname): book = load_workbook(fname) writer = pandas.ExcelWriter(fname, engine='xlsxwriter') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) else: writer = pandas.ExcelWriter(fname, engine='xlsxwriter') data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter') writer.save() writer.close() def write_time_series_to_excel_writer(self, writer, sheet, data_frame): """ write_time_series_to_excel_writer - writes Pandas data frame to disk in Excel format for a writer Parameters ---------- writer : ExcelWriter File handle to use for writing Excel file to disk sheet : str sheet in excel data_frame : DataFrame data frame to be written """ data_frame.to_excel(writer, sheet, engine='xlsxwriter') def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff=None, dateparse=None, postfix='.close', intraday_tz='UTC'): """ read_excel_data_frame - Reads Excel from disk into DataFrame Parameters ---------- f_name : str Excel file path to read freq : str Frequency of data to read (intraday/daily etc) cutoff : DateTime (optional) end date to read up to dateparse : str (optional) date parser to use postfix : str (optional) postfix to add to each columns intraday_tz : str timezone of file if uses intraday data Returns ------- DataFrame """ return self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse, postfix=postfix, intraday_tz=intraday_tz, excel_sheet=excel_sheet) def remove_time_series_cache_on_disk(self, fname, engine='hdf5_fixed', db_server='127.0.0.1'): if 'hdf5' in engine: engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) pass elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 10 * 1000 fname = os.path.basename(fname).replace('.', '_') self.logger.info('Load MongoDB library: ' + fname) c = pymongo.MongoClient(db_server, connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS) store.delete_library(fname) c.close() self.logger.info("Deleted MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # delete the old copy try: os.remove(h5_filename) except: pass ### functions to handle HDF5 on disk def write_time_series_cache_to_disk(self, fname, data_frame, engine='hdf5_fixed', append_data=False, db_server='127.0.0.1', filter_out_matching=None): """ write_time_series_cache_to_disk - writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic Parmeters --------- fname : str path of file data_frame : DataFrame data frame to be written to disk """ # default HDF5 format hdf5_format = 'fixed' if 'hdf5' in engine: hdf5_format = engine.split('_')[1] engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) data_frame.columns = self.find_replace_chars( data_frame.columns, _invalid_chars, _replace_chars) data_frame.columns = ['A_' + x for x in data_frame.columns] data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns') bcolzpath = self.get_bcolz_filename(fname) shutil.rmtree(bcolzpath, ignore_errors=True) zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') self.logger.info('Load MongoDB library: ' + fname) c = pymongo.MongoClient(db_server, connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS) database = None try: database = store[fname] except: pass if database is None: store.initialize_library(fname, audit=False) self.logger.info("Created MongoDB library: " + fname) else: self.logger.info("Got MongoDB library: " + fname) # Access the library library = store[fname] if ('intraday' in fname): data_frame = data_frame.astype('float32') if filter_out_matching is not None: cols = data_frame.columns new_cols = [] for col in cols: if filter_out_matching not in col: new_cols.append(col) data_frame = data_frame[new_cols] # can duplicate values if we have existing dates if append_data: library.append(fname, data_frame) else: library.write(fname, data_frame) c.close() self.logger.info("Written MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # append data only works for HDF5 stored as tables (but this is much slower than fixed format) # removes duplicated entries at the end if append_data: store = pandas.HDFStore(h5_filename, format=hdf5_format, complib="blosc", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') # get last row which matches and remove everything after that (because append # function doesn't check for duplicated rows nrows = len(store['data'].index) last_point = data_frame.index[-1] i = nrows - 1 while (i > 0): read_index = store.select('data', start=i, stop=nrows).index[0] if (read_index <= last_point): break i = i - 1 # remove rows at the end, which are duplicates of the incoming time series store.remove(key='data', start=i, stop=nrows) store.put(key='data', value=data_frame, format=hdf5_format, append=True) store.close() else: h5_filename_temp = self.get_h5_filename(fname + ".temp") # delete the old copy try: os.remove(h5_filename_temp) except: pass store = pandas.HDFStore(h5_filename_temp, format=hdf5_format, complib="blosc", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') store.put(key='data', value=data_frame, format=hdf5_format) store.close() # delete the old copy try: os.remove(h5_filename) except: pass # once written to disk rename os.rename(h5_filename_temp, h5_filename) def get_h5_filename(self, fname): """ get_h5_filename - Strips h5 off filename returning first portion of filename Parameters ---------- fname : str h5 filename to strip Returns ------- str """ if fname[-3:] == '.h5': return fname return fname + ".h5" def get_bcolz_filename(self, fname): """ get_bcolz_filename - Strips h5 off filename returning first portion of filename Parameters ---------- fname : str h5 filename to strip Returns ------- str """ if fname[-6:] == '.bcolz': return fname return fname + ".bcolz" def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields=None): """ write_r_compatible_hdf_dataframe - Write a DataFrame to disk in as an R compatible HDF5 file Parameters ---------- data_frame : DataFrame data frame to be written fname : str file path to be written fields : list(str) columns to be written """ fname_r = self.get_h5_filename(fname) self.logger.info("About to dump R binary HDF5 - " + fname_r) data_frame32 = data_frame.astype('float32') if fields is None: fields = data_frame32.columns.values # decompose date/time into individual fields (easier to pick up in R) data_frame32['Year'] = data_frame.index.year data_frame32['Month'] = data_frame.index.month data_frame32['Day'] = data_frame.index.day data_frame32['Hour'] = data_frame.index.hour data_frame32['Minute'] = data_frame.index.minute data_frame32['Second'] = data_frame.index.second data_frame32['Millisecond'] = data_frame.index.microsecond / 1000 data_frame32 = data_frame32[[ 'Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond' ] + fields] cols = data_frame32.columns store_export = pandas.HDFStore(fname_r) store_export.put('df_for_r', data_frame32, data_columns=cols) store_export.close() def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None, finish_date=None, db_server='127.0.0.1'): """ read_time_series_cache_from_disk - Reads time series cache from disk in either HDF5 or bcolz Parameters ---------- fname : str file to be read from Returns ------- DataFrame """ if (engine == 'bcolz'): try: name = self.get_bcolz_filename(fname) zlens = bcolz.open(rootdir=name) data_frame = zlens.todataframe() data_frame.index = pandas.DatetimeIndex(data_frame['DTS_']) data_frame.index.name = 'Date' del data_frame['DTS_'] # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas data_frame.columns = self.find_replace_chars( data_frame.columns, _replace_chars, _invalid_chars) data_frame.columns = [x[2:] for x in data_frame.columns] return data_frame except: return None elif (engine == 'arctic'): socketTimeoutMS = 2 * 1000 import pymongo from arctic import Arctic fname = os.path.basename(fname).replace('.', '_') self.logger.info('Load MongoDB library: ' + fname) c = pymongo.MongoClient(db_server, connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS) # Access the library library = store[fname] if start_date is None and finish_date is None: item = library.read(fname) else: from arctic.date import DateRange item = library.read(fname, date_range=DateRange( start_date, finish_date)) c.close() self.logger.info('Read ' + fname) return item.data elif os.path.isfile(self.get_h5_filename(fname)): store = pandas.HDFStore(self.get_h5_filename(fname)) data_frame = store.select("data") if ('intraday' in fname): data_frame = data_frame.astype('float32') store.close() return data_frame return None ### functions for CSV reading and writing def write_time_series_to_csv(self, csv_path, data_frame): data_frame.to_csv(csv_path) def read_csv_data_frame(self, f_name, freq, cutoff=None, dateparse=None, postfix='.close', intraday_tz='UTC', excel_sheet=None): """ read_csv_data_frame - Reads CSV/Excel from disk into DataFrame Parameters ---------- f_name : str CSV/Excel file path to read freq : str Frequency of data to read (intraday/daily etc) cutoff : DateTime (optional) end date to read up to dateparse : str (optional) date parser to use postfix : str (optional) postfix to add to each columns intraday_tz : str (optional) timezone of file if uses intraday data excel_sheet : str (optional) Excel sheet to be read Returns ------- DataFrame """ if (freq == 'intraday'): if dateparse is None: dateparse = lambda x: datetime.datetime(*map( int, [x[6:10], x[3:5], x[0:2], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'dukascopy': dateparse = lambda x: datetime.datetime(*map( int, [x[0:4], x[5:7], x[8:10], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'c': # use C library for parsing dates, several hundred times quicker # requires compilation of library to install import ciso8601 dateparse = lambda x: ciso8601.parse_datetime(x) if excel_sheet is None: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=True, date_parser=dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA']) data_frame = data_frame.astype('float32') data_frame.index.names = ['Date'] old_cols = data_frame.columns new_cols = [] # add '.close' to each column name for col in old_cols: new_cols.append(col + postfix) data_frame.columns = new_cols else: # daily data if 'events' in f_name: data_frame = pandas.read_csv(f_name) # very slow conversion data_frame = data_frame.convert_objects(convert_dates='coerce') else: if excel_sheet is None: try: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["DATE"], date_parser=dateparse) except: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["Date"], date_parser=dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA']) # convert Date to Python datetime # datetime data_frame['Date1'] = data_frame.index # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S') # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]), # int(x[12:13]), int(x[15:16]), int(x[18:19]))) # data_frame.index = data_frame['Date1'] # data_frame.drop('Date1') # slower method: data_frame.index = pandas.to_datetime(data_frame.index) if (freq == 'intraday'): # assume time series are already in UTC and assign this (can specify other time zones) data_frame = data_frame.tz_localize(intraday_tz) # end cutoff date if cutoff is not None: if (isinstance(cutoff, str)): cutoff = parse(cutoff) data_frame = data_frame.loc[data_frame.index < cutoff] return data_frame def find_replace_chars(self, array, to_find, replace_with): for i in range(0, len(to_find)): array = [x.replace(to_find[i], replace_with[i]) for x in array] return array def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None): """ convert_csv_data_frame - Converts CSV file to HDF5 file Parameters ---------- f_name : str File name to be read category : str data category of file (used in HDF5 filename) freq : str intraday/daily frequency (used in HDF5 filename) cutoff : DateTime (optional) filter dates up to here dateparse : str date parser to use """ self.logger.info("About to read... " + f_name) data_frame = self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse) category_f_name = self.create_cache_file_name(category) self.write_time_series_cache_to_disk(category_f_name, data_frame) def clean_csv_file(self, f_name): """ clean_csv_file - Cleans up CSV file (removing empty characters) before writing back to disk Parameters ---------- f_name : str CSV file to be cleaned """ with codecs.open(f_name, 'rb', 'utf-8') as myfile: data = myfile.read() # clean file first if dirty if data.count('\x00'): self.logger.info('Cleaning CSV...') with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of: of.write(data.replace('\x00', '')) shutil.move(f_name + '.tmp', f_name) def create_cache_file_name(self, filename): return DataConstants().folder_time_series_data + "/" + filename
class Filter(object): _time_series_cache = {} # shared across all instances of object! def __init__(self): # self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) return def filter_time_series(self, market_data_request, data_frame, pad_columns=False): """ filter_time_series - Filters a time series given a set of criteria (like start/finish date and tickers) Parameters ---------- market_data_request : MarketDataRequest defining time series filtering data_frame : DataFrame time series to be filtered pad_columns : boolean true, non-existant columns with nan Returns ------- DataFrame """ start_date = market_data_request.start_date finish_date = market_data_request.finish_date data_frame = self.filter_time_series_by_date(start_date, finish_date, data_frame) # filter by ticker.field combinations requested columns = self.create_tickers_fields_list(market_data_request) if (pad_columns): data_frame = self.pad_time_series_columns(columns, data_frame) else: data_frame = self.filter_time_series_by_columns( columns, data_frame) return data_frame def create_calendar_bus_days(self, start_date, end_date, cal='FX'): """ create_calendar_bus_days - Creates a calendar of business days) Parameters ---------- start_date : DateTime start date of calendar end_date : DataFrame finish date of calendar cal : str business calendar to use Returns ------- list """ hols = self.get_holidays(start_date, end_date, cal) index = pandas.bdate_range(start=start_date, end=end_date, freq='D') return [x for x in index if x not in hols] def get_holidays(self, start_date, end_date, cal='FX'): """ get_holidays - Gets the holidays for a given calendar Parameters ---------- start_date : DateTime start date of calendar end_date : DataFrame finish date of calendar cal : str business calendar to use Returns ------- list """ # TODO use Pandas CustomBusinessDays to get more calendars holidays_list = [] if cal == 'FX': # filter for Christmas & New Year's Day for i in range(1970, 2020): holidays_list.append(str(i) + "-12-25") holidays_list.append(str(i) + "-01-01") if cal == 'WEEKDAY': bday = CustomBusinessDay(weekmask='Sat Sun') holidays_list = pandas.date_range(start_date, end_date, freq=bday) # holidays_list = pandas.to_datetime(holidays_list).order() holidays_list = pandas.to_datetime(holidays_list).sort_values() # floor start date start = np.datetime64(start_date) - np.timedelta64(1, 'D') # ceiling end date end = np.datetime64(end_date) + np.timedelta64(1, 'D') holidays_list = [x for x in holidays_list if x >= start and x <= end] return pandas.to_datetime(holidays_list) def filter_time_series_by_holidays(self, data_frame, cal='FX'): """ filter_time_series_by_holidays - Removes holidays from a given time series Parameters ---------- data_frame : DataFrame data frame to be filtered cal : str business calendar to use Returns ------- DataFrame """ # optimal case for weekdays: remove Saturday and Sunday if (cal == 'WEEKDAY'): return data_frame.ix[data_frame.index.dayofweek <= 4] # select only those holidays in the sample holidays_start = self.get_holidays(data_frame.index[0], data_frame.index[-1], cal) if (holidays_start.size == 0): return data_frame holidays_end = holidays_start + np.timedelta64(1, 'D') # floored_dates = data_frame.index.normalize() # # filter_by_index_start = floored_dates.searchsorted(holidays_start) # filter_by_index_end = floored_dates.searchsorted(holidays_end) # # indices_to_keep = [] # # if filter_by_index_end[0] == 0: # counter = filter_by_index_end[0] + 1 # start_index = 1 # else: # counter = 0 # start_index = 0 # # for i in range(start_index, len(holidays_start)): # indices = list(range(counter, filter_by_index_start[i] - 1)) # indices_to_keep = indices_to_keep + indices # # counter = filter_by_index_end[i] + 1 # # indices = list(range(counter, len(floored_dates))) # indices_to_keep = indices_to_keep + indices # # data_frame_filtered = data_frame.ix[indices_to_keep] data_frame_left = data_frame data_frame_filtered = [] for i in range(0, len(holidays_start)): data_frame_temp = data_frame_left.ix[ data_frame_left.index < holidays_start[i]] data_frame_left = data_frame_left.ix[ data_frame_left.index >= holidays_end[i]] data_frame_filtered.append(data_frame_temp) data_frame_filtered.append(data_frame_left) return pandas.concat(data_frame_filtered) def filter_time_series_by_date(self, start_date, finish_date, data_frame): """ filter_time_series_by_date - Filter time series by start/finish dates Parameters ---------- start_date : DateTime start date of calendar finish_date : DataTime finish date of calendar data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ offset = 0 # inclusive return self.filter_time_series_by_date_offset(start_date, finish_date, data_frame, offset) def filter_time_series_by_days(self, days, data_frame): """ filter_time_series_by_date - Filter time series by start/finish dates Parameters ---------- start_date : DateTime start date of calendar finish_date : DataTime finish date of calendar data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ offset = 0 # inclusive finish_date = datetime.datetime.utcnow() start_date = finish_date - timedelta(days=days) return self.filter_time_series_by_date_offset(start_date, finish_date, data_frame, offset) def filter_time_series_by_date_exc(self, start_date, finish_date, data_frame): """ filter_time_series_by_date_exc - Filter time series by start/finish dates (exclude start & finish dates) Parameters ---------- start_date : DateTime start date of calendar finish_date : DataTime finish date of calendar data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ offset = 1 # exclusive of start finish date return self.filter_time_series_by_date_offset(start_date, finish_date, data_frame, offset) # try: # # filter by dates for intraday data # if(start_date is not None): # data_frame = data_frame.loc[start_date <= data_frame.index] # # if(finish_date is not None): # # filter by start_date and finish_date # data_frame = data_frame.loc[data_frame.index <= finish_date] # except: # # filter by dates for daily data # if(start_date is not None): # data_frame = data_frame.loc[start_date.date() <= data_frame.index] # # if(finish_date is not None): # # filter by start_date and finish_date # data_frame = data_frame.loc[data_frame.index <= finish_date.date()] # # return data_frame def filter_time_series_by_date_offset(self, start_date, finish_date, data_frame, offset): """ filter_time_series_by_date_offset - Filter time series by start/finish dates (and an offset) Parameters ---------- start_date : DateTime start date of calendar finish_date : DataTime finish date of calendar data_frame : DataFrame data frame to be filtered offset : int offset to be applied Returns ------- DataFrame """ try: data_frame = self.filter_time_series_aux(start_date, finish_date, data_frame, offset) except: # start_date = start_date.date() # finish_date = finish_date.date() # if isinstance(start_date, str): # # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M' # try: # start_date = datetime.datetime.strptime(start_date, '%b %d %Y %H:%M') # except: # i = 0 # # if isinstance(finish_date, str): # # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M' # try: # finish_date = datetime.datetime.strptime(finish_date, '%b %d %Y %H:%M') # except: # i = 0 try: start_date = start_date.date() except: pass try: finish_date = finish_date.date() except: pass # if we have dates stored as opposed to TimeStamps (ie. daily data), we use a simple (slower) method # for filtering daily data if (start_date is not None): data_frame = data_frame.loc[start_date < data_frame.index] if (finish_date is not None): # filter by start_date and finish_date data_frame = data_frame.loc[data_frame.index < finish_date] return data_frame def filter_time_series_aux(self, start_date, finish_date, data_frame, offset): """ filter_time_series_aux - Filter time series by start/finish dates (and an offset) Parameters ---------- start_date : DateTime start date of calendar finish_date : DataTime finish date of calendar data_frame : DataFrame data frame to be filtered offset : int offset to be applied Returns ------- DataFrame """ start_index = 0 finish_index = len(data_frame.index) - offset # filter by dates for intraday data if (start_date is not None): start_index = data_frame.index.searchsorted(start_date) if (0 <= start_index + offset < len(data_frame.index)): start_index = start_index + offset # data_frame = data_frame.ix[start_date < data_frame.index] if (finish_date is not None): finish_index = data_frame.index.searchsorted(finish_date) if (0 <= finish_index - offset < len(data_frame.index)): finish_index = finish_index - offset # data_frame = data_frame[data_frame.index < finish_date] return data_frame.ix[start_index:finish_index] def filter_time_series_by_time_of_day(self, hour, minute, data_frame, in_tz=None, out_tz=None): """ filter_time_series_by_time_of_day - Filter time series by time of day Parameters ---------- hour : int hour of day minute : int minute of day data_frame : DataFrame data frame to be filtered in_tz : str (optional) time zone of input data frame out_tz : str (optional) time zone of output data frame Returns ------- DataFrame """ if out_tz is not None: if in_tz is not None: data_frame = data_frame.tz_localize(pytz.timezone(in_tz)) data_frame = data_frame.tz_convert(pytz.timezone(out_tz)) # change internal representation of time data_frame.index = pandas.DatetimeIndex(data_frame.index.values) data_frame = data_frame[data_frame.index.minute == minute] data_frame = data_frame[data_frame.index.hour == hour] return data_frame def filter_time_series_by_minute_of_hour(self, minute, data_frame, in_tz=None, out_tz=None): """ filter_time_series_by_minute_of_hour - Filter time series by minute of hour Parameters ---------- minute : int minute of hour data_frame : DataFrame data frame to be filtered in_tz : str (optional) time zone of input data frame out_tz : str (optional) time zone of output data frame Returns ------- DataFrame """ if out_tz is not None: if in_tz is not None: data_frame = data_frame.tz_localize(pytz.timezone(in_tz)) data_frame = data_frame.tz_convert(pytz.timezone(out_tz)) # change internal representation of time data_frame.index = pandas.DatetimeIndex(data_frame.index.values) data_frame = data_frame[data_frame.index.minute == minute] return data_frame def filter_time_series_between_hours(self, start_hour, finish_hour, data_frame): """ filter_time_series_between_hours - Filter time series between hours of the day Parameters ---------- start_hour : int start of hour filter finish_hour : int finish of hour filter data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ data_frame = data_frame[data_frame.index.hour <= finish_hour] data_frame = data_frame[data_frame.index.hour >= start_hour] return data_frame def filter_time_series_by_columns(self, columns, data_frame): """ filter_time_series_by_columns - Filter time series by certain columns Parameters ---------- columns : list(str) start of hour filter data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ return data_frame[columns] def pad_time_series_columns(self, columns, data_frame): """ pad_time_series - Selects time series from a dataframe and if necessary creates empty columns Parameters ---------- columns : str columns to be included with this keyword data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ old_columns = data_frame.columns common_columns = [val for val in columns if val in old_columns] uncommon_columns = [val for val in columns if val not in old_columns] data_frame = data_frame[common_columns] if uncommon_columns != []: self.logger.info("Padding missing columns " + str(uncommon_columns)) for x in uncommon_columns: data_frame.loc[:, x] = np.nan return data_frame def filter_time_series_by_excluded_keyword(self, keyword, data_frame): """ filter_time_series_by_excluded_keyword - Filter time series to exclude columns which contain keyword Parameters ---------- keyword : str columns to be excluded with this keyword data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ columns = [elem for elem in data_frame.columns if keyword not in elem] return self.filter_time_series_by_columns(columns, data_frame) def filter_time_series_by_included_keyword(self, keyword, data_frame): """ filter_time_series_by_included_keyword - Filter time series to include columns which contain keyword Parameters ---------- keyword : str columns to be included with this keyword data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ columns = [elem for elem in data_frame.columns if keyword in elem] return self.filter_time_series_by_columns(columns, data_frame) def filter_time_series_by_minute_freq(self, freq, data_frame): """ filter_time_series_by_minute_freq - Filter time series where minutes correspond to certain minute filter Parameters ---------- freq : int minute frequency to be filtered data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ return data_frame.loc[data_frame.index.minute % freq == 0] def create_tickers_fields_list(self, market_data_request): """ create_ticker_field_list - Creates a list of tickers concatenated with fields from a MarketDataRequest Parameters ---------- market_data_request : MarketDataRequest request to be expanded Returns ------- list(str) """ tickers = market_data_request.tickers fields = market_data_request.fields if isinstance(tickers, str): tickers = [tickers] if isinstance(fields, str): fields = [fields] tickers_fields_list = [] # create ticker.field combination for series we wish to return for f in fields: for t in tickers: tickers_fields_list.append(t + '.' + f) return tickers_fields_list def resample_time_series(self, data_frame, freq): return data_frame.asfreq(freq, method='pad') def resample_time_series_frequency(self, data_frame, data_resample_freq, data_resample_type='mean', fill_empties=False): # should we take the mean, first, last in our resample if data_resample_type == 'mean': data_frame_r = data_frame.resample(data_resample_freq).mean() elif data_resample_type == 'first': data_frame_r = data_frame.resample(data_resample_freq).first() elif data_resample_type == 'last': data_frame_r = data_frame.resample(data_resample_freq).last() else: # TODO implement other types return if fill_empties == True: data_frame, data_frame_r = data_frame.align(data_frame_r, join='left', axis=0) data_frame_r = data_frame_r.fillna(method='ffill') return data_frame_r def make_FX_1_min_working_days(self, data_frame): data_frame = data_frame.resample('1min').mean() data_frame = self.filter_time_series_by_holidays(data_frame, 'FX') data_frame = data_frame.fillna(method='ffill') data_frame = self.remove_out_FX_out_of_hours(data_frame) return data_frame def remove_out_FX_out_of_hours(self, data_frame): """ remove_out_FX_out_of_hours - Filtered a time series for FX hours (ie. excludes 22h GMT Fri - 19h GMT Sun) Parameters ---------- data_frame : DataFrame data frame with FX prices Returns ------- list(str) """ # assume data_frame is in GMT time # remove Fri after 22:00 GMT # remove Sat # remove Sun before 19:00 GMT # Monday = 0, ..., Sunday = 6 data_frame = data_frame.ix[~((data_frame.index.dayofweek == 4) & (data_frame.index.hour > 22))] data_frame = data_frame.ix[~((data_frame.index.dayofweek == 5))] data_frame = data_frame.ix[~((data_frame.index.dayofweek == 6) & (data_frame.index.hour < 19))] return data_frame
def __init__(self): self.logger = LoggerManager().getLogger(__name__) return
class BBGLowLevelDaily(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelDaily, self).__init__() self.logger = LoggerManager().getLogger(__name__) def combine_slices(self, data_frame_cols, data_frame_slice): # data try: if (data_frame_slice.columns.get_level_values(1).values[0] not in data_frame_cols): # return data_frame.join(data_frame_slice, how="outer") return data_frame_slice except Exception as e: self.logger.warn('Data slice empty ' + str(e)) return None return None # populate options for Bloomberg request for asset daily request def fill_options(self, market_data_request): options = OptionsBBG() options.security = market_data_request.tickers options.startDateTime = market_data_request.start_date options.endDateTime = market_data_request.finish_date options.fields = market_data_request.fields options.overrides = market_data_request.overrides return options def process_message(self, msg): # Process received events # SLOW loop (careful, not all the fields will be returned every time hence need to include the field name in the tuple) # perhaps try to run in parallel? implementation = 'simple' if implementation == 'simple': ticker = msg.getElement('securityData').getElement( 'security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') data = defaultdict(dict) # # # FASTER avoid calling getValue/getElement methods in blpapi, very slow, better to cache variables for i in range(fieldData.numValues()): mini_field_data = fieldData.getValue(i) date = mini_field_data.getElement(0).getValue() for j in range(1, mini_field_data.numElements()): field_value = mini_field_data.getElement(j) data[(str(field_value.name()), ticker)][date] = field_value.getValue() # ORIGINAL repeated calling getValue/getElement much slower # for i in range(fieldData.numValues()): # for j in range(1, fieldData.getValue(i).numElements()): # data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \ # = fieldData.getValue(i).getElement(j).getValue() elif implementation == 'py4j': pass # TODO Py4J # from findatapy.market.bbgloop import bbgloop # from py4j.java_gateway import JavaGateway # gateway = JavaGateway() # data = gateway.entry_point.parseFieldDataArray(msg) elif implementation == 'cython': ticker = msg.getElement('securityData').getElement( 'security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') from findatapy.market.bbgloop import bbgloop data = bbgloop(fieldData, ticker) elif implementation == 'numba': ticker = msg.getElement('securityData').getElement( 'security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') from findatapy.market.bbgloop_numba import bbgloop_numba data = bbgloop_numba(fieldData, ticker) # TODO cython data_frame = pandas.DataFrame(data) # if obsolete ticker could return no values if (not (data_frame.empty)): # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker']) data_frame.index = pandas.to_datetime(data_frame.index) self.logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) else: return None return data_frame # create request for data def send_bar_request(self, session, eventQueue, options, cid): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("HistoricalDataRequest") request.set("startDate", options.startDateTime.strftime('%Y%m%d')) request.set("endDate", options.endDateTime.strftime('%Y%m%d')) # # only one security/eventType per request for field in options.fields: request.getElement("fields").appendValue(field) for security in options.security: request.getElement("securities").appendValue(security) self.logger.info("Sending Bloomberg Daily Request:" + str(request)) session.sendRequest(request=request, correlationId=cid)
def __init__(self): super(DataVendorBBG, self).__init__() self.logger = LoggerManager().getLogger(__name__)
class DataVendorBBG(DataVendor): """Abstract class for download of Bloomberg daily, intraday data and reference data. Implemented by: DataVendorBBGOpen - Adapted version of new Bloomberg Open API for Python which is recommended. Note that this requires compilation, via installed C++ compiler. For Python 3.5, this is Microsoft Visual Studio 2015. Or it is easier to install blpapi via conda Note: no longer supports COM API, which is slower and only 32 bit """ # these fields are BDS style fields to be downloaded using Bloomberg's Reference Data interface list_of_ref_fields = [ 'release-date-time-full', 'last-tradeable-day', 'futures-chain-tickers', 'futures-chain-last-trade-dates', 'first-notice-date', 'first-tradeable-day', 'cal-non-settle-dates' ] list_of_ref_vendor_fields = [ 'ECO_FUTURE_RELEASE_DATE_LIST', 'LAST_TRADEABLE_DT', 'FUT_CHAIN', 'FUT_CHAIN_LAST_TRADE_DATES', 'FUT_NOTICE_FIRST', 'FUT_FIRST_TRADE_DT', 'CALENDAR_NON_SETTLEMENT_DATES' ] def __init__(self): super(DataVendorBBG, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, market_data_request): """Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ market_data_request = MarketDataRequest(md_request=market_data_request) market_data_request_vendor = self.construct_vendor_market_data_request( market_data_request) data_frame = None self.logger.info("Request Bloomberg data") # do we need daily or intraday data? if (market_data_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']): # work out the fields which need to be downloaded via Bloomberg ref request (BDP) and # those that can be downloaded via Historical request (BDH) ref_fields = [] ref_vendor_fields = [] for i in range(0, len(market_data_request.fields)): if market_data_request.fields[i] in self.list_of_ref_fields \ or market_data_request_vendor.fields[i] in self.list_of_ref_vendor_fields: ref_fields.append(market_data_request.fields[i]) ref_vendor_fields.append( market_data_request_vendor.fields[i]) non_ref_fields = [] non_ref_vendor_fields = [] for i in range(0, len(market_data_request.fields)): if market_data_request.fields[i] not in self.list_of_ref_fields \ and market_data_request_vendor.fields[i] not in self.list_of_ref_vendor_fields: non_ref_fields.append(market_data_request.fields[i]) non_ref_vendor_fields.append( market_data_request_vendor.fields[i]) # for certain cases, need to use ReferenceDataRequest # eg. for events times/dates, last tradeable date fields (when specified) if len(ref_fields) > 0: # careful: make sure you copy the market data request object (when threading, altering that can # cause concurrency issues!) old_fields = copy.deepcopy(market_data_request.fields) old_vendor_fields = copy.deepcopy( market_data_request_vendor.fields) # market_data_request = MarketDataRequest(md_request=market_data_request_copy) market_data_request.fields = ref_fields market_data_request.vendor_fields = ref_vendor_fields market_data_request_vendor = self.construct_vendor_market_data_request( market_data_request) # just select those reference fields to download via reference datetime_data_frame = self.get_reference_data( market_data_request_vendor, market_data_request) # download all the other event or non-ref fields (uses HistoricalDataRequest to Bloomberg) # concatenate with date time fields if len(non_ref_fields) > 0: market_data_request.fields = non_ref_fields market_data_request.vendor_fields = non_ref_vendor_fields market_data_request_vendor = self.construct_vendor_market_data_request( market_data_request) events_data_frame = self.get_daily_data( market_data_request, market_data_request_vendor) col = events_data_frame.index.name events_data_frame = events_data_frame.reset_index( drop=False) data_frame = pandas.concat( [events_data_frame, datetime_data_frame], axis=1) temp = data_frame[col] del data_frame[col] data_frame.index = temp else: data_frame = datetime_data_frame market_data_request.fields = copy.deepcopy(old_fields) market_data_request_vendor.fields = copy.deepcopy( old_vendor_fields) # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg else: data_frame = self.get_daily_data(market_data_request, market_data_request_vendor) try: # convert fields with release-dt to dates (special case!) for c in data_frame.columns: if 'release-dt' in c: data_frame[c] = ( data_frame[c]).astype('int').astype(str).apply( lambda x: pandas.to_datetime( x, format='%Y%m%d')) except: pass # assume one ticker only for intraday data and use IntradayDataRequest to Bloomberg if (market_data_request.freq in ['tick', 'intraday', 'second', 'minute', 'hourly']): market_data_request_vendor.tickers = market_data_request_vendor.tickers[ 0] if market_data_request.freq in ['tick', 'second']: data_frame = self.download_tick(market_data_request_vendor) else: data_frame = self.download_intraday(market_data_request_vendor) if data_frame is not None: if data_frame.empty: try: self.logger.info("No tickers returned for: " + market_data_request_vendor.tickers) except: pass return None cols = data_frame.columns.values import pytz try: data_frame = data_frame.tz_localize(pytz.utc) except: data_frame = data_frame.tz_convert(pytz.utc) cols = market_data_request.tickers[0] + "." + cols data_frame.columns = cols self.logger.info("Completed request from Bloomberg.") return data_frame def get_daily_data(self, market_data_request, market_data_request_vendor): data_frame = self.download_daily(market_data_request_vendor) # convert from vendor to findatapy tickers/fields if data_frame is not None: if data_frame.empty: self.logger.info("No tickers returned for...") try: self.logger.info(str(market_data_request_vendor.tickers)) except: pass return None returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) # TODO if empty try downloading again a year later try: fields = self.translate_from_vendor_field( returned_fields, market_data_request) except: print('t') tickers = self.translate_from_vendor_ticker( returned_tickers, market_data_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' return data_frame def get_reference_data(self, market_data_request_vendor, market_data_request): end = datetime.utcnow() from datetime import timedelta end = end + timedelta( days=365 ) # because very often we may with to download data about future calendar events # end.replace(year = end.year + 1) market_data_request_vendor.finish_date = end self.logger.debug("Requesting ref for " + market_data_request_vendor.tickers[0] + " etc.") data_frame = self.download_ref(market_data_request_vendor) self.logger.debug("Waiting for ref...") # convert from vendor to findatapy tickers/fields if data_frame is not None: if data_frame.empty: return None returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) if data_frame is not None: # TODO if empty try downloading again a year later fields = self.translate_from_vendor_field(returned_fields, market_data_request) tickers = self.translate_from_vendor_ticker( returned_tickers, market_data_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined # need to convert numerical and datetime columns separately post pandas 0.23 data_frame = data_frame.apply(pandas.to_numeric, errors='ignore') data_frame = data_frame.apply(pandas.to_datetime, errors='ignore') # TODO coerce will be deprecated from pandas 0.23.0 onwards) so remove! # data_frame = data_frame.convert_objects(convert_dates = 'coerce', convert_numeric= 'coerce') return data_frame # implement method in abstract superclass @abc.abstractmethod def kill_session(self): return @abc.abstractmethod def download_tick(self, market_data_request): return @abc.abstractmethod def download_intraday(self, market_data_request): return @abc.abstractmethod def download_daily(self, market_data_request): return @abc.abstractmethod def download_ref(self, market_data_request): return
class BacktestRequest(MarketDataRequest): """Contains parameters necessary to define a backtest, including start date, finish date, transaction cost, etc Used by TradingModel and Backtest to construct backtested returns for trading strategies """ def __init__(self): super(MarketDataRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None self.__tech_params = TechParams() @property def signal_name(self): return self.__signal_name @signal_name.setter def signal_name(self, signal_name): self.__signal_name = signal_name @property def tech_params(self): return self.__tech_params @tech_params.setter def tech_params(self, tech_params): self.__tech_params = tech_params @property def spot_tc_bp(self): return self.__spot_tc_bp @spot_tc_bp.setter def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0) @property def asset(self): return self.__asset @asset.setter def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.") self.__asset = asset @property def instrument(self): return self.__instrument @instrument.setter def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument
def __init__(self): super(MarketDataRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None self.__tech_params = TechParams()
class TradeAnalysis(object): """Applies some basic trade analysis for a trading strategy (as defined by TradingModel). Use PyFolio to create some basic trading statistics. Also allows you test multiple parameters for a specific strategy (like TC). """ def __init__(self, engine = ChartConstants().chartfactory_default_engine): self.logger = LoggerManager().getLogger(__name__) self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' ' self.SCALE_FACTOR = 3 self.DEFAULT_PLOT_ENGINE = engine self.chart = Chart(engine=self.DEFAULT_PLOT_ENGINE) return def run_strategy_returns_stats(self, trading_model, index = None, engine = 'pyfolio'): """Plots useful statistics for the trading strategy (using PyFolio) Parameters ---------- trading_model : TradingModel defining trading strategy index: DataFrame define strategy by a time series """ if index is None: pnl = trading_model.get_strategy_pnl() else: pnl = index tz = Timezone() calculations = Calculations() if engine == 'pyfolio': # PyFolio assumes UTC time based DataFrames (so force this localisation) try: pnl = tz.localise_index_as_UTC(pnl) except: pass # set the matplotlib style sheet & defaults # at present this only works in Matplotlib engine try: matplotlib.rcdefaults() plt.style.use(ChartConstants().chartfactory_style_sheet['chartpy-pyfolio']) except: pass # TODO for intraday strategies, make daily # convert DataFrame (assumed to have only one column) to Series pnl = calculations.calculate_returns(pnl) pnl = pnl.dropna() pnl = pnl[pnl.columns[0]] fig = pf.create_returns_tear_sheet(pnl, return_fig=True) try: plt.savefig (trading_model.DUMP_PATH + "stats.png") except: pass plt.show() elif engine == 'finmarketpy': # assume we have TradingModel # to do to take in a time series from chartpy import Canvas, Chart pnl = trading_model.plot_strategy_pnl(silent_plot=True) # plot the final strategy individual = trading_model.plot_strategy_group_pnl_trades(silent_plot=True) # plot the individual trade P&Ls pnl_comp = trading_model.plot_strategy_group_benchmark_pnl(silent_plot=True) # plot all the cumulative P&Ls of each component ir_comp = trading_model.plot_strategy_group_benchmark_pnl_ir(silent_plot=True) # plot all the IR of each component leverage = trading_model.plot_strategy_leverage(silent_plot=True) # plot the leverage of the portfolio ind_lev = trading_model.plot_strategy_group_leverage(silent_plot=True) # plot all the individual leverages canvas = Canvas([[pnl, individual], [pnl_comp, ir_comp], [leverage, ind_lev]] ) canvas.generate_canvas(silent_display=False, canvas_plotter='plain') def run_excel_trade_report(self, trading_model, excel_file = 'model.xlsx'): """ run_excel_trade_report - Creates an Excel spreadsheet with model returns and latest trades Parameters ---------- trading_model : TradingModel defining trading strategy (can be a list) """ trading_model_list = trading_model if not(isinstance(trading_model_list, list)): trading_model_list = [trading_model] writer = pandas.ExcelWriter(excel_file, engine='xlsxwriter') for tm in trading_model_list: strategy_name = tm.FINAL_STRATEGY returns = tm.get_strategy_group_benchmark_pnl() returns.to_excel(writer, sheet_name=strategy_name + ' rets', engine='xlsxwriter') # write raw position/trade sizes self.save_positions_trades(tm, tm.get_strategy_signal(),tm.get_strategy_trade(), 'pos', 'trades', writer) if hasattr(tm, '_strategy_signal_notional'): # write position/trade sizes scaled by notional self.save_positions_trades(tm, tm.get_strategy_signal_notional(), tm.get_strategy_trade_notional(), 'pos - Not', 'trades - Not', writer) if hasattr(tm, '_strategy_signal_contracts'): # write position/trade sizes in terms of contract sizes self.save_positions_trades(tm, tm.get_strategy_signal_contracts(), tm.get_strategy_trade_contracts(), 'pos - Cont', 'trades - Cont', writer) # TODO Add summary sheet comparing return statistics for all the different models in the list writer.save() writer.close() def save_positions_trades(self, tm, signals, trades, signal_caption, trade_caption, writer): signals.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' hist ' + signal_caption, engine='xlsxwriter') if hasattr(tm, 'STRIP'): strip = tm.STRIP recent_signals = tm.grab_signals(signals, date=[-1, -2, -5, -10, -20], strip=strip) recent_trades = tm.grab_signals(trades, date=[-1, -2, -5, -10, -20], strip=strip) recent_signals.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' ' + signal_caption, engine='xlsxwriter') recent_trades.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' ' + trade_caption, engine='xlsxwriter') def run_tc_shock(self, strategy, tc = None): if tc is None: tc = [0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0] parameter_list = [{'spot_tc_bp' : x } for x in tc] pretty_portfolio_names = [str(x) + 'bp' for x in tc] # names of the portfolio parameter_type = 'TC analysis' # broad type of parameter name return self.run_arbitrary_sensitivity(strategy, parameter_list=parameter_list, pretty_portfolio_names=pretty_portfolio_names, parameter_type=parameter_type) ###### Parameters and signal generations (need to be customised for every model) def run_arbitrary_sensitivity(self, trading_model, parameter_list = None, parameter_names = None, pretty_portfolio_names = None, parameter_type = None): asset_df, spot_df, spot_df2, basket_dict = trading_model.load_assets() port_list = None ret_stats_list = [] for i in range(0, len(parameter_list)): br = trading_model.load_parameters() current_parameter = parameter_list[i] # for calculating P&L for k in current_parameter.keys(): setattr(br, k, current_parameter[k]) trading_model.br = br # for calculating signals signal_df = trading_model.construct_signal(spot_df, spot_df2, br.tech_params, br) backtest = Backtest() self.logger.info("Calculating... " + str(pretty_portfolio_names[i])) backtest.calculate_trading_PnL(br, asset_df, signal_df) ret_stats_list.append(backtest.get_portfolio_pnl_ret_stats()) stats = str(backtest.get_portfolio_pnl_desc()[0]) port = backtest.get_cumportfolio().resample('B').mean() port.columns = [str(pretty_portfolio_names[i]) + ' ' + stats] if port_list is None: port_list = port else: port_list = port_list.join(port) # reset the parameters of the strategy trading_model.br = trading_model.load_parameters() style = Style() ir = [t.inforatio()[0] for t in ret_stats_list] # if we have too many combinations remove legend and use scaled shaded colour # if len(port_list) > 10: # style.color = 'Blues' # style.display_legend = False # plot all the variations style.resample = 'B' style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + '.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + '.html' style.scale_factor = self.SCALE_FACTOR style.title = trading_model.FINAL_STRATEGY + ' ' + parameter_type self.chart.plot(port_list, chart_type='line', style=style) # plot all the IR in a bar chart form (can be easier to read!) style = Style() style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + ' IR.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + ' IR.html' style.scale_factor = self.SCALE_FACTOR style.title = trading_model.FINAL_STRATEGY + ' ' + parameter_type summary = pandas.DataFrame(index = pretty_portfolio_names, data = ir, columns = ['IR']) self.chart.plot(summary, chart_type='bar', style=style) return port_list ###### Parameters and signal generations (need to be customised for every model) ###### Plot all the output seperately def run_arbitrary_sensitivity_separately(self, trading_model, parameter_list = None, pretty_portfolio_names = None, strip = None): # asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets() final_strategy = trading_model.FINAL_STRATEGY for i in range(0, len(parameter_list)): br = trading_model.fill_backtest_request() current_parameter = parameter_list[i] # for calculating P&L for k in current_parameter.keys(): setattr(br, k, current_parameter[k]) trading_model.FINAL_STRATEGY = final_strategy + " " + pretty_portfolio_names[i] self.logger.info("Calculating... " + pretty_portfolio_names[i]) trading_model.br = br trading_model.construct_strategy(br = br) trading_model.plot_strategy_pnl() trading_model.plot_strategy_leverage() trading_model.plot_strategy_group_benchmark_pnl(strip = strip) # reset the parameters of the strategy trading_model.br = trading_model.fill_backtest_request() trading_model.FINAL_STRATEGY = final_strategy def run_day_of_month_analysis(self, trading_model): from finmarketpy.economics.seasonality import Seasonality calculations = Calculations() seas = Seasonality() trading_model.construct_strategy() pnl = trading_model.get_strategy_pnl() # get seasonality by day of the month pnl = pnl.resample('B').mean() rets = calculations.calculate_returns(pnl) bus_day = seas.bus_day_of_month_seasonality(rets, add_average = True) # get seasonality by month pnl = pnl.resample('BM').mean() rets = calculations.calculate_returns(pnl) month = seas.monthly_seasonality(rets) self.logger.info("About to plot seasonality...") style = Style() # Plotting spot over day of month/month of year style.color = 'Blues' style.scale_factor = self.SCALE_FACTOR style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.html' style.title = trading_model.FINAL_STRATEGY + ' day of month seasonality' style.display_legend = False style.color_2_series = [bus_day.columns[-1]] style.color_2 = ['red'] # red, pink style.linewidth_2 = 4 style.linewidth_2_series = [bus_day.columns[-1]] style.y_axis_2_series = [bus_day.columns[-1]] self.chart.plot(bus_day, chart_type='line', style=style) style = Style() style.scale_factor = self.SCALE_FACTOR style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.html' style.title = trading_model.FINAL_STRATEGY + ' month of year seasonality' self.chart.plot(month, chart_type='line', style=style) return month
class FXCrossFactory(object): def __init__(self, market_data_generator=None): self.logger = LoggerManager().getLogger(__name__) self.fxconv = FXConv() self.cache = {} self.calculations = Calculations() self.market_data_generator = market_data_generator return def flush_cache(self): self.cache = {} def get_fx_cross_tick(self, start, end, cross, cut="NYC", source="dukascopy", cache_algo='internet_load_return', type='spot', environment='backtest', fields=['bid', 'ask']): if isinstance(cross, str): cross = [cross] market_data_request = MarketDataRequest( gran_freq="tick", freq_mult=1, freq='tick', cut=cut, fields=['bid', 'ask', 'bidv', 'askv'], cache_algo=cache_algo, environment=environment, start_date=start, finish_date=end, data_source=source, category='fx') market_data_generator = self.market_data_generator data_frame_agg = None for cr in cross: if (type == 'spot'): market_data_request.tickers = cr cross_vals = market_data_generator.fetch_market_data( market_data_request) # if user only wants 'close' calculate that from the bid/ask fields if fields == ['close']: cross_vals = cross_vals[[cr + '.bid', cr + '.ask']].mean(axis=1) cross_vals.columns = [cr + '.close'] if data_frame_agg is None: data_frame_agg = cross_vals else: data_frame_agg = data_frame_agg.join(cross_vals, how='outer') # strip the nan elements data_frame_agg = data_frame_agg.dropna() return data_frame_agg def get_fx_cross(self, start, end, cross, cut="NYC", source="bloomberg", freq="intraday", cache_algo='internet_load_return', type='spot', environment='backtest', fields=['close']): if source == "gain" or source == 'dukascopy' or freq == 'tick': return self.get_fx_cross_tick(start, end, cross, cut=cut, source=source, cache_algo=cache_algo, type='spot', fields=fields) if isinstance(cross, str): cross = [cross] market_data_request_list = [] freq_list = [] type_list = [] for cr in cross: market_data_request = MarketDataRequest(freq_mult=1, cut=cut, fields=['close'], freq=freq, cache_algo=cache_algo, start_date=start, finish_date=end, data_source=source, environment=environment) market_data_request.type = type market_data_request.cross = cr if freq == 'intraday': market_data_request.gran_freq = "minute" # intraday elif freq == 'daily': market_data_request.gran_freq = "daily" # daily market_data_request_list.append(market_data_request) data_frame_agg = [] # depends on the nature of operation as to whether we should use threading or multiprocessing library if DataConstants().market_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = DataConstants().market_thread_no['other'] if market_data_request_list[0].data_source in DataConstants( ).market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request_list[0].data_source] # fudge, issue with multithreading and accessing HDF5 files # if self.market_data_generator.__class__.__name__ == 'CachedMarketDataGenerator': # thread_no = 0 if (thread_no > 0): pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self._get_individual_fx_cross, market_data_request_list) data_frame_agg = self.calculations.iterative_outer_join( result.get()) # data_frame_agg = self.calculations.pandas_outer_join(result.get()) # pool would have already been closed earlier # try: # pool.close() # pool.join() # except: pass else: for md_request in market_data_request_list: data_frame_agg.append( self._get_individual_fx_cross(md_request)) data_frame_agg = self.calculations.pandas_outer_join( data_frame_agg) # strip the nan elements data_frame_agg = data_frame_agg.dropna() return data_frame_agg def _get_individual_fx_cross(self, market_data_request): cr = market_data_request.cross type = market_data_request.type freq = market_data_request.freq base = cr[0:3] terms = cr[3:6] if (type == 'spot'): # non-USD crosses if base != 'USD' and terms != 'USD': base_USD = self.fxconv.correct_notation('USD' + base) terms_USD = self.fxconv.correct_notation('USD' + terms) # TODO check if the cross exists in the database # download base USD cross market_data_request.tickers = base_USD market_data_request.category = 'fx' if base_USD + '.close' in self.cache: base_vals = self.cache[base_USD + '.close'] else: base_vals = self.market_data_generator.fetch_market_data( market_data_request) self.cache[base_USD + '.close'] = base_vals # download terms USD cross market_data_request.tickers = terms_USD market_data_request.category = 'fx' if terms_USD + '.close' in self.cache: terms_vals = self.cache[terms_USD + '.close'] else: terms_vals = self.market_data_generator.fetch_market_data( market_data_request) self.cache[terms_USD + '.close'] = terms_vals # if quoted USD/base flip to get USD terms if (base_USD[0:3] == 'USD'): if 'USD' + base in '.close' in self.cache: base_vals = self.cache['USD' + base + '.close'] else: base_vals = 1 / base_vals self.cache['USD' + base + '.close'] = base_vals # if quoted USD/terms flip to get USD terms if (terms_USD[0:3] == 'USD'): if 'USD' + terms in '.close' in self.cache: terms_vals = self.cache['USD' + terms + '.close'] else: terms_vals = 1 / terms_vals self.cache['USD' + terms + '.close'] = base_vals base_vals.columns = ['temp'] terms_vals.columns = ['temp'] cross_vals = base_vals.div(terms_vals, axis='index') cross_vals.columns = [cr + '.close'] base_vals.columns = [base_USD + '.close'] terms_vals.columns = [terms_USD + '.close'] else: # if base == 'USD': non_USD = terms # if terms == 'USD': non_USD = base correct_cr = self.fxconv.correct_notation(cr) market_data_request.tickers = correct_cr market_data_request.category = 'fx' if correct_cr + '.close' in self.cache: cross_vals = self.cache[correct_cr + '.close'] else: cross_vals = self.market_data_generator.fetch_market_data( market_data_request) # flip if not convention if (correct_cr != cr): if cr + '.close' in self.cache: cross_vals = self.cache[cr + '.close'] else: cross_vals = 1 / cross_vals self.cache[cr + '.close'] = cross_vals self.cache[correct_cr + '.close'] = cross_vals # cross_vals = self.market_data_generator.harvest_time_series(market_data_request) cross_vals.columns.names = [cr + '.close'] elif type[0:3] == "tot": if freq == 'daily': # download base USD cross market_data_request.tickers = base + 'USD' market_data_request.category = 'fx-tot' if type == "tot": base_vals = self.market_data_generator.fetch_market_data( market_data_request) else: x = 0 # download terms USD cross market_data_request.tickers = terms + 'USD' market_data_request.category = 'fx-tot' if type == "tot": terms_vals = self.market_data_generator.fetch_market_data( market_data_request) else: pass base_rets = self.calculations.calculate_returns(base_vals) terms_rets = self.calculations.calculate_returns(terms_vals) cross_rets = base_rets.sub(terms_rets.iloc[:, 0], axis=0) # first returns of a time series will by NaN, given we don't know previous point cross_rets.iloc[0] = 0 cross_vals = self.calculations.create_mult_index(cross_rets) cross_vals.columns = [cr + '-tot.close'] elif freq == 'intraday': self.logger.info( 'Total calculated returns for intraday not implemented yet' ) return None return cross_vals
def free_form_tickers_query(self, free_form_query, best_match_only=False, list_query=False, ret_fields=["category", "data_source", "freq", "cut", "tickers", "vendor_tickers", "fields"], smart_group=True): """From a string or list of properties for predefined tickers, we create a DataFrame that can be used to populate a MarketDataRequest. We search through all the predefined tickers, and "guess" any matches to our query, without having to use the standard query format which consists of category.data_source.freq.cut.ticker such as this example fx.bloomberg.daily.NYC.EURUSD.close eg. quandl.fx will match all tickers which are from "quandl" and have a "category" fx We must be careful to make sure that categories, data_sources etc. are unique and do not overlap with other properties like tickers Parameters ---------- free_form_query : str A query that can be used to generate a MarketDataRequest eg. quandl.fx best_match_only : bool Only return at most 1 row of a DataFrame (default: False) list_query : bool Is this a list of tickers? ret_fields : str(list) Which properties of a MarketDataRequest to return smart_group : bool Smart group tickers of a particular category in a specific row Returns ------- DataFrame """ logger = LoggerManager().getLogger(__name__) logger.info( "Finding ticker combination which matches " + str(free_form_query)) df = ConfigManager._data_frame_time_series_tickers if list_query and isinstance(free_form_query, list): free_form_query = free_form_query elif "," in free_form_query: free_form_query = free_form_query.split(",") else: free_form_query = [free_form_query] df_joined_list = [] for key in free_form_query: df_joined = df key = ConfigManager.split_ticker_string(key) # Search through all the keywords, and see if matches with any # columns of our predefined tickers try: for k in key: for c in df.columns: try: df_temp = df_joined[df_joined[c] == k] except: df_temp = pd.DataFrame() if not (df_temp.empty): df_joined = df_temp break df_joined_list.append(df_joined) except Exception as e: pass # Drop any duplicated tickers df = pd.concat(df_joined_list).drop_duplicates() if len(df.index) > 1: logger.info( "Found multiple matches for ticker combination, first " "trying smart group...") if smart_group: df = self.smart_group_dataframe_tickers( df, ret_fields=ret_fields) if best_match_only: logger.info("Taking only top match...") df = pd.DataFrame(df.head(1)) if ret_fields is not None and not (df.empty): df = df[ret_fields] return df
class BBGLowLevelRef(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelRef, self).__init__() self.logger = LoggerManager().getLogger(__name__) # populate options for Bloomberg request for asset intraday request def fill_options(self, market_data_request): options = OptionsBBG() options.security = market_data_request.tickers options.startDateTime = market_data_request.start_date options.endDateTime = market_data_request.finish_date options.fields = market_data_request.fields options.overrides = market_data_request.overrides return options def process_message(self, msg): data = collections.defaultdict(dict) # process received events securityDataArray = msg.getElement('securityData') index = 0 single = False for securityData in list(securityDataArray.values()): ticker = securityData.getElementAsString("security") fieldData = securityData.getElement("fieldData") for field in fieldData.elements(): if not field.isValid(): field_name = "%s" % field.name() self.logger.error(field_name + " is NULL") elif field.isArray(): # iterate over complex data returns. field_name = "%s" % field.name() for i, row in enumerate(field.values()): try: field_val = re.findall(r'"(.*?)"', "%s" % row)[0] except: e = row.getElement(0) # k = str(e.name()) field_val = e.getValue() data[(field_name, ticker)][index] = field_val index = index + 1 else: field_name = "%s" % field.name() data[(field_name, ticker)][0] = field.getValueAsString() index = index + 1 single = True # no need to create multi-index late, because just row!! CAREFUL!! needed for futures expiries fieldExceptionArray = securityData.getElement("fieldExceptions") for fieldException in list(fieldExceptionArray.values()): errorInfo = fieldException.getElement("errorInfo") print(errorInfo.getElementAsString("category"), ":", \ fieldException.getElementAsString("fieldId")) print("stop") # explicitly state from_dict (buggy if create pandas.DataFrame(data) data_frame = pandas.DataFrame.from_dict(data) # if obsolete ticker could return no values if (not (data_frame.empty)): # if not(single): # pass # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker']) self.logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) else: return None return data_frame def combine_slices(self, data_frame_cols, data_frame_slice): if (data_frame_slice.columns.get_level_values(1).values[0] not in data_frame_cols): # return data_frame.join(data_frame_slice, how="outer") return data_frame_slice return None # create request for data def send_bar_request(self, session, eventQueue, options, cid): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest('ReferenceDataRequest') self.add_override(request, 'TIME_ZONE_OVERRIDE', 23) # force GMT time self.add_override(request, 'INCLUDE_EXPIRED_CONTRACTS', "Y") # include expired contracts self.add_override(request, 'START_DT', options.startDateTime.strftime('%Y%m%d')) self.add_override(request, 'END_DT', options.endDateTime.strftime('%Y%m%d')) # only one security/eventType per request for field in options.fields: request.getElement("fields").appendValue(field) for security in options.security: request.getElement("securities").appendValue(security) if options.overrides != {}: for k in options.overrides.keys(): new_k = k # is there a pretty name for this? if k in super().convert_override_fields: new_k = super().convert_override_fields[k] self.add_override(request, new_k, options.overrides[k]) self.logger.info("Sending Bloomberg Ref Request:" + str(request)) session.sendRequest(request=request, correlationId=cid)
class BacktestRequest(MarketDataRequest): """Contains parameters necessary to define a backtest, including start date, finish date, transaction cost, etc Used by TradingModel and Backtest to construct backtested returns for trading strategies """ def __init__(self): super(MarketDataRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None # output parameters for backtest (should we add returns statistics on legends, write CSVs with returns etc.) self.__plot_start = None self.__calc_stats = True self.__write_csv = False self.__write_csv_pnl = False self.__plot_interim = False self.__include_benchmark = False self.__tech_params = TechParams() # default parameters for portfolio level vol adjustment self.__portfolio_vol_adjust = False self.__portfolio_vol_period_shift = 0 self.__portfolio_vol_rebalance_freq = None self.__portfolio_vol_resample_freq = None self.__portfolio_vol_resample_type = 'mean' self.__portfolio_vol_target = 0.1 # 10% vol target self.__portfolio_vol_max_leverage = None self.__portfolio_vol_periods = 20 self.__portfolio_vol_obs_in_year = 252 # default parameters for signal level vol adjustment self.__signal_vol_adjust = False self.__signal_vol_period_shift = 0 self.__signal_vol_rebalance_freq = None self.__signal_vol_resample_freq = None self.__signal_vol_resample_type = 'mean' self.__signal_vol_target = 0.1 # 10% vol target self.__signal_vol_max_leverage = None self.__signal_vol_periods = 20 self.__signal_vol_obs_in_year = 252 # portfolio notional size self.__portfolio_notional_size = None self.__portfolio_combination = None self.__portfolio_combination_weights = None # parameters for maximum position limits (expressed as whole portfolio) self.__max_net_exposure = None self.__max_abs_exposure = None self.__position_clip_rebalance_freq = None self.__position_clip_resample_freq = None # by default apply max position criterion on last business day of month self.__position_clip_resample_type = 'mean' self.__position_clip_period_shift = 0 # take profit and stop loss parameters self.__take_profit = None self.__stop_loss = None # should we delay the signal? self.__signal_delay = 0 ##### properties for output of the backtest @property def plot_start(self): return self.__plot_start @plot_start.setter def plot_start(self, plot_start): self.__plot_start = plot_start @property def calc_stats(self): return self.__calc_stats @calc_stats.setter def calc_stats(self, calc_stats): self.__calc_stats = calc_stats @property def write_csv(self): return self.__write_csv @write_csv.setter def write_csv(self, write_csv): self.__write_csv = write_csv @property def write_csv_pnl(self): return self.__write_csv_pnl @write_csv_pnl.setter def write_csv_pnl(self, write_csv_pnl): self.__write_csv_pnl = write_csv_pnl @property def plot_interim(self): return self.__plot_interim @plot_interim.setter def plot_interim(self, plot_interim): self.__plot_interim = plot_interim @property def include_benchmark(self): return self.__include_benchmark @include_benchmark.setter def include_benchmark(self, include_benchmark): self.__include_benchmark = include_benchmark ##### properties for portfolio level volatility adjustment @property def portfolio_vol_adjust(self): return self.__portfolio_vol_adjust @portfolio_vol_adjust.setter def portfolio_vol_adjust(self, portfolio_vol_adjust): self.__portfolio_vol_adjust = portfolio_vol_adjust @property def portfolio_vol_rebalance_freq(self): return self.__portfolio_vol_rebalance_freq @portfolio_vol_rebalance_freq.setter def portfolio_vol_rebalance_freq(self, portfolio_vol_rebalance_freq): self.__portfolio_vol_rebalance_freq = portfolio_vol_rebalance_freq @property def portfolio_vol_resample_type(self): return self.__portfolio_vol_resample_type @portfolio_vol_resample_type.setter def portfolio_vol_resample_type(self, portfolio_vol_resample_type): self.__portfolio_vol_resample_type = portfolio_vol_resample_type @property def portfolio_vol_resample_freq(self): return self.__portfolio_vol_resample_freq @portfolio_vol_resample_freq.setter def portfolio_vol_resample_freq(self, portfolio_vol_resample_freq): self.__portfolio_vol_resample_freq = portfolio_vol_resample_freq @property def portfolio_vol_period_shift(self): return self.__portfolio_vol_period_shift @portfolio_vol_period_shift.setter def portfolio_vol_period_shift(self, portfolio_vol_period_shift): self.__portfolio_vol_period_shift = portfolio_vol_period_shift @property def portfolio_vol_target(self): return self.__portfolio_vol_target @portfolio_vol_target.setter def portfolio_vol_target(self, portfolio_vol_target): self.__portfolio_vol_target = portfolio_vol_target @property def portfolio_vol_max_leverage(self): return self.__portfolio_vol_max_leverage @portfolio_vol_max_leverage.setter def portfolio_vol_max_leverage(self, portfolio_vol_max_leverage): self.__portfolio_vol_max_leverage = portfolio_vol_max_leverage @property def portfolio_vol_periods(self): return self.__portfolio_vol_periods @portfolio_vol_periods.setter def portfolio_vol_periods(self, portfolio_vol_periods): self.__portfolio_vol_periods = portfolio_vol_periods @property def portfolio_vol_obs_in_year(self): return self.__portfolio_vol_obs_in_year @portfolio_vol_obs_in_year.setter def portfolio_vol_obs_in_year(self, portfolio_vol_obs_in_year): self.__portfolio_vol_obs_in_year = portfolio_vol_obs_in_year ##### properties for signal level vol adjustment @property def signal_vol_adjust(self): return self.__signal_vol_adjust @signal_vol_adjust.setter def signal_vol_adjust(self, signal_vol_adjust): self.__signal_vol_adjust = signal_vol_adjust @property def signal_vol_rebalance_freq(self): return self.__signal_vol_rebalance_freq @signal_vol_rebalance_freq.setter def signal_vol_rebalance_freq(self, signal_vol_rebalance_freq): self.__signal_vol_rebalance_freq = signal_vol_rebalance_freq @property def signal_vol_resample_type(self): return self.__signal_vol_resample_type @signal_vol_resample_type.setter def signal_vol_resample_type(self, signal_vol_resample_type): self.__signal_vol_resample_type = signal_vol_resample_type @property def signal_vol_resample_freq(self): return self.__signal_vol_resample_freq @signal_vol_resample_freq.setter def signal_vol_resample_freq(self, signal_vol_resample_freq): self.__signal_vol_resample_freq = signal_vol_resample_freq @property def signal_vol_period_shift(self): return self.__signal_vol_period_shift @signal_vol_period_shift.setter def signal_vol_period_shift(self, signal_vol_period_shift): self.__signal_vol_period_shift = signal_vol_period_shift @property def signal_vol_target(self): return self.__signal_vol_target @signal_vol_target.setter def signal_vol_target(self, signal_vol_target): self.__signal_vol_target = signal_vol_target @property def signal_vol_max_leverage(self): return self.__signal_vol_max_leverage @signal_vol_max_leverage.setter def signal_vol_max_leverage(self, signal_vol_max_leverage): self.__signal_vol_max_leverage = signal_vol_max_leverage @property def signal_vol_periods(self): return self.__signal_vol_periods @signal_vol_periods.setter def signal_vol_periods(self, signal_vol_periods): self.__signal_vol_periods = signal_vol_periods @property def signal_vol_obs_in_year(self): return self.__signal_vol_obs_in_year @signal_vol_obs_in_year.setter def signal_vol_obs_in_year(self, signal_vol_obs_in_year): self.__signal_vol_obs_in_year = signal_vol_obs_in_year ##### portfolio notional size @property def portfolio_notional_size(self): return self.__portfolio_notional_size @portfolio_notional_size.setter def portfolio_notional_size(self, portfolio_notional_size): self.__portfolio_notional_size = float(portfolio_notional_size) ##### portfolio combination style (sum, mean, weighted, weighted-sum) @property def portfolio_combination(self): return self.__portfolio_combination @portfolio_combination.setter def portfolio_combination(self, portfolio_combination): self.__portfolio_combination = portfolio_combination ##### portfolio weights (sum, mean) @property def portfolio_combination_weights(self): return self.__portfolio_combination_weights @portfolio_combination_weights.setter def portfolio_combination_weights(self, portfolio_combination_weights): self.__portfolio_combination_weights = portfolio_combination_weights ##### properties for maximum position constraints @property def max_net_exposure(self): return self.__max_net_exposure @max_net_exposure.setter def max_net_exposure(self, max_net_exposure): self.__max_net_exposure = max_net_exposure @property def max_abs_exposure(self): return self.__max_abs_exposure @max_abs_exposure.setter def max_abs_exposure(self, max_abs_exposure): self.__max_abs_exposure = max_abs_exposure @property def position_clip_rebalance_freq(self): return self.__position_clip_rebalance_freq @position_clip_rebalance_freq.setter def position_clip_rebalance_freq(self, position_clip_rebalance_freq): self.__position_clip_rebalance_freq = position_clip_rebalance_freq @property def position_clip_resample_type(self): return self.__position_clip_resample_type @position_clip_resample_type.setter def position_clip_resample_type(self, position_clip_resample_type): self.__position_clip_resample_type = position_clip_resample_type @property def position_clip_resample_freq(self): return self.__position_clip_resample_freq @position_clip_resample_freq.setter def position_clip_resample_freq(self, position_clip_resample_freq): self.__position_clip_resample_freq = position_clip_resample_freq @property def position_clip_period_shift(self): return self.__position_clip_period_shift @position_clip_period_shift.setter def position_clip_period_shift(self, position_clip_period_shift): self.__position_clip_period_shift = position_clip_period_shift ##### stop loss and take profit @property def stop_loss(self): return self.__stop_loss @stop_loss.setter def stop_loss(self, stop_loss): self.__stop_loss = stop_loss @property def take_profit(self): return self.__take_profit @take_profit.setter def take_profit(self, take_profit): self.__take_profit = take_profit ##### tech indicators and spot bp tc @property def tech_params(self): return self.__tech_params @tech_params.setter def tech_params(self, tech_params): self.__tech_params = tech_params @property def spot_tc_bp(self): return self.__spot_tc_bp @spot_tc_bp.setter def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0) #### FOR FUTURE USE ### @property def signal_name(self): return self.__signal_name @signal_name.setter def signal_name(self, signal_name): self.__signal_name = signal_name @property def asset(self): return self.__asset @asset.setter def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.") self.__asset = asset @property def instrument(self): return self.__instrument @instrument.setter def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument @property def signal_delay(self): return self.__signal_delay @signal_delay.setter def signal_delay(self, signal_delay): self.__signal_delay = signal_delay
def __init__(self): super(BBGLowLevelRef, self).__init__() self.logger = LoggerManager().getLogger(__name__)
def __init__(self): # self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) return
class BBGLowLevelIntraday(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelIntraday, self).__init__() self.logger = LoggerManager().getLogger(__name__) # constants self.BAR_DATA = blpapi.Name("barData") self.BAR_TICK_DATA = blpapi.Name("barTickData") self.OPEN = blpapi.Name("open") self.HIGH = blpapi.Name("high") self.LOW = blpapi.Name("low") self.CLOSE = blpapi.Name("close") self.VOLUME = blpapi.Name("volume") self.NUM_EVENTS = blpapi.Name("numEvents") self.TIME = blpapi.Name("time") def combine_slices(self, data_frame_cols, data_frame_slice): # return data_frame.append(data_frame_slice) return data_frame_slice # populate options for Bloomberg request for asset intraday request def fill_options(self, market_data_request): options = OptionsBBG() options.security = market_data_request.tickers[ 0] # get 1st ticker only! options.event = market_data_request.trade_side.upper() options.barInterval = market_data_request.freq_mult options.startDateTime = market_data_request.start_date options.endDateTime = market_data_request.finish_date options.gapFillInitialBar = False options.overrides = market_data_request.overrides if hasattr(options.startDateTime, 'microsecond'): options.startDateTime = options.startDateTime.replace( microsecond=0) if hasattr(options.endDateTime, 'microsecond'): options.endDateTime = options.endDateTime.replace(microsecond=0) return options # iterate through Bloomberg output creating a DataFrame output # implements abstract method def process_message(self, msg): data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA) # self.logger.info("Processing intraday data for " + str(self._options.security)) data_vals = list(data.values()) # data_matrix = numpy.zeros([len(data_vals), 6]) # data_matrix.fill(numpy.nan) # # date_index = [None] * len(data_vals) # # for i in range(0, len(data_vals)): # data_matrix[i][0] = data_vals[i].getElementAsFloat(self.OPEN) # data_matrix[i][1] = data_vals[i].getElementAsFloat(self.HIGH) # data_matrix[i][2] = data_vals[i].getElementAsFloat(self.LOW) # data_matrix[i][3] = data_vals[i].getElementAsFloat(self.CLOSE) # data_matrix[i][4] = data_vals[i].getElementAsInteger(self.VOLUME) # data_matrix[i][5] = data_vals[i].getElementAsInteger(self.NUM_EVENTS) # # date_index[i] = data_vals[i].getElementAsDatetime(self.TIME) # # self.logger.info("Dates between " + str(date_index[0]) + " - " + str(date_index[-1])) # # # create pandas dataframe with the Bloomberg output # return pandas.DataFrame(data = data_matrix, index = date_index, # columns=['open', 'high', 'low', 'close', 'volume', 'events']) ## for loop method is touch slower # time_list = [] # data_table = [] # for bar in data_vals: # data_table.append([bar.getElementAsFloat(self.OPEN), # bar.getElementAsFloat(self.HIGH), # bar.getElementAsFloat(self.LOW), # bar.getElementAsFloat(self.CLOSE), # bar.getElementAsInteger(self.VOLUME), # bar.getElementAsInteger(self.NUM_EVENTS)]) # # time_list.append(bar.getElementAsDatetime(self.TIME)) # each price time point has multiple fields - marginally quicker tuple = [([ bar.getElementAsFloat(self.OPEN), bar.getElementAsFloat(self.HIGH), bar.getElementAsFloat(self.LOW), bar.getElementAsFloat(self.CLOSE), bar.getElementAsInteger(self.VOLUME), bar.getElementAsInteger(self.NUM_EVENTS) ], bar.getElementAsDatetime(self.TIME)) for bar in data_vals] data_table = list(map(itemgetter(0), tuple)) time_list = list(map(itemgetter(1), tuple)) try: self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1])) except: self.logger.info("No dates retrieved") return None # create pandas dataframe with the Bloomberg output return pandas.DataFrame( data=data_table, index=time_list, columns=['open', 'high', 'low', 'close', 'volume', 'events']) # implement abstract method: create request for data def send_bar_request(self, session, eventQueue, options, cid): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("IntradayBarRequest") # only one security/eventType per request request.set("security", options.security) request.set("eventType", options.event) request.set("interval", options.barInterval) # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT') if options.startDateTime and options.endDateTime: request.set("startDateTime", options.startDateTime) request.set("endDateTime", options.endDateTime) if options.gapFillInitialBar: request.append("gapFillInitialBar", True) self.logger.info("Sending Intraday Bloomberg Request...") session.sendRequest(request=request, correlationId=cid)
def remove_time_series_cache_on_disk(self, fname, engine='hdf5_fixed', db_server='127.0.0.1', db_port='6379', timeout=10, username=None, password=None): logger = LoggerManager().getLogger(__name__) if 'hdf5' in engine: engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) pass elif (engine == 'redis'): fname = os.path.basename(fname).replace('.', '_') try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) if (fname == 'flush_all_keys'): r.flushall() else: # allow deletion of keys by pattern matching x = r.keys('*' + fname) if len(x) > 0: r.delete(x) # r.delete(fname) except Exception as e: logger.warning("Cannot delete non-existent key " + fname + " in Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') logger.info('Load MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) store.delete_library(fname) c.close() logger.info("Deleted MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # delete the old copy try: os.remove(h5_filename) except: pass
class MarketDataRequest(object): """Provides parameters for requesting market data. Includes parameters to define the ticker we'd like to fetch, the start and finish dates for our request, as well as the various fields we would like and also the frequency of the data. """ # properties # # data_source eg. bbg, yahoo, quandl # start_date # finish_date # tickers (can be list) eg. EURUSD # category (eg. fx, equities, fixed_income, cal_event, fundamental) # freq_mult (eg. 1) # freq (tick, intraday or daily) # gran_freq (minute, daily, hourly, daily, weekly, monthly, yearly) # fields (can be list) # vendor_tickers (optional) # vendor_fields (optional) # cache_algo (eg. internet, disk, memory) - internet will forcibly download from the internet # abstract_curve (optional) # environment (eg. prod, backtest) - old data is saved with prod, backtest will overwrite the last data point # overrides (optional) - if you need to specify any data overrides (eg. for BBG) def generate_key(self): """Generate a key to describe this MarketDataRequest object, which can be used in a cache, as a hash-style key Returns ------- str Key to describe this MarketDataRequest """ from findatapy.market.ioengine import SpeedCache if self.freq == 'daily': ticker = None else: ticker = self.tickers[0] self.__category_key = self.create_category_key(self, ticker=ticker) return SpeedCache().generate_key(self, [ 'logger', '_MarketDataRequest__abstract_curve', '_MarketDataRequest__cache_algo', '_MarketDataRequest__overrides' ]) def __init__(self, data_source=None, start_date='year', finish_date=datetime.datetime.utcnow(), tickers=None, category=None, freq_mult=1, freq="daily", gran_freq=None, cut="NYC", fields=['close'], cache_algo="internet_load_return", vendor_tickers=None, vendor_fields=None, environment="backtest", trade_side='trade', expiry_date=None, md_request=None, abstract_curve=None, overrides={}): self.logger = LoggerManager().getLogger(__name__) # can deep copy MarketDataRequest (use a lock, so can be used with threading when downloading time series) if md_request is not None: import threading lock = threading.Lock() with lock: import copy self.freq_mult = copy.deepcopy(md_request.freq_mult) # define frequency of data self.gran_freq = copy.deepcopy(md_request.gran_freq) self.freq_mult = copy.deepcopy(md_request.freq_mult) self.freq = copy.deepcopy(md_request.freq) # data source, start and fin self.data_source = copy.deepcopy(md_request.data_source) self.start_date = copy.deepcopy(md_request.start_date) self.finish_date = copy.deepcopy(md_request.finish_date) self.category = copy.deepcopy( md_request.category) # special predefined categories self.cut = copy.deepcopy( md_request.cut ) # closing time of the data (eg. NYC, LDN, TOK etc) self.fields = copy.deepcopy( md_request.fields) # fields, eg. close, high, low, open self.cache_algo = copy.deepcopy( md_request.cache_algo ) # internet_load_return (cache_algo_return is for future use) self.vendor_tickers = copy.deepcopy( md_request.vendor_tickers) # define vendor tickers self.vendor_fields = copy.deepcopy( md_request.vendor_fields) # define vendor fields self.environment = copy.deepcopy( md_request.environment ) # backtest environment only supported at present self.trade_side = copy.deepcopy(md_request.trade_side) self.expiry_date = copy.deepcopy(md_request.expiry_date) # self.abstract_curve = copy.deepcopy(md_request.abstract_curve) self.overrides = copy.deepcopy(md_request.overrides) self.tickers = copy.deepcopy( md_request.tickers ) # need this after category in case have wildcard else: self.freq_mult = freq_mult # define frequency of data self.gran_freq = gran_freq self.freq_mult = freq_mult self.freq = freq # data source, start and fin self.data_source = data_source self.start_date = start_date self.finish_date = finish_date self.category = category # special predefined categories self.cut = cut # closing time of the data (eg. NYC, LDN, TOK etc) self.fields = fields # fields, eg. close, high, low, open self.cache_algo = cache_algo # internet_load_return (cache_algo_return is for future use) self.vendor_tickers = vendor_tickers # define vendor tickers self.vendor_fields = vendor_fields # define vendor fields self.environment = environment # backtest environment only supported at present self.trade_side = trade_side self.expiry_date = expiry_date self.abstract_curve = abstract_curve self.overrides = overrides self.tickers = tickers def create_category_key(self, market_data_request, ticker=None): """Returns a category key for the associated MarketDataRequest, which can be used to create filenames (or as part of a storage key in a cache) Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ category = 'default-cat' cut = 'default-cut' if market_data_request.category is not None: category = market_data_request.category environment = market_data_request.environment source = market_data_request.data_source freq = market_data_request.freq if market_data_request.cut is not None: cut = market_data_request.cut if (ticker is not None): key = str(environment) + "." + str(category) + '.' + str(source) + '.' + str(freq) + '.' + str(cut) \ + '.' + str(ticker) else: key = str(environment) + "." + str(category) + '.' + str( source) + '.' + str(freq) + '.' + str(cut) return key @property def data_source(self): return self.__data_source @data_source.setter def data_source(self, data_source): try: valid_data_source = [ 'ats', 'bloomberg', 'dukascopy', 'fred', 'gain', 'google', 'quandl', 'yahoo' ] if not data_source in valid_data_source: self.logger.warning(data_source & " is not a defined data source.") except: pass self.__data_source = data_source @property def category(self): return self.__category @category.setter def category(self, category): self.__category = category @property def tickers(self): return self.__tickers @tickers.setter def tickers(self, tickers): if tickers is not None: if not isinstance(tickers, list): tickers = [tickers] config = None new_tickers = [] if tickers is not None: for tick in tickers: if '*' in tick: start = '' if tick[-1] == "*" and tick[0] != "*": start = "^" tick = start + "(" + tick.replace('*', '') + ")" if config is None: from findatapy.util import ConfigManager config = ConfigManager().get_instance() new_tickers.append( config.get_filtered_tickers_list_for_category( self.__category, self.__data_source, self.__freq, self.__cut, tick)) else: new_tickers.append(tick) new_tickers = self._flatten_list(new_tickers) self.__tickers = new_tickers else: self.__tickers = tickers @property def fields(self): return self.__fields @fields.setter def fields(self, fields): valid_fields = ['open', 'high', 'low', 'close', 'volume', 'numEvents'] if not isinstance(fields, list): fields = [fields] for field_entry in fields: if not field_entry in valid_fields: i = 0 # self.logger.warning(field_entry + " is not a valid field.") # add error checking self.__fields = fields @property def vendor_tickers(self): return self.__vendor_tickers @vendor_tickers.setter def vendor_tickers(self, vendor_tickers): if vendor_tickers is not None: if not isinstance(vendor_tickers, list): vendor_tickers = [vendor_tickers] self.__vendor_tickers = vendor_tickers @property def vendor_fields(self): return self.__vendor_fields @vendor_fields.setter def vendor_fields(self, vendor_fields): if vendor_fields is not None: if not isinstance(vendor_fields, list): vendor_fields = [vendor_fields] self.__vendor_fields = vendor_fields @property def freq(self): return self.__freq @freq.setter def freq(self, freq): freq = freq.lower() valid_freq = [ 'tick', 'second', 'minute', 'intraday', 'hourly', 'daily', 'weekly', 'monthly', 'quarterly', 'annually' ] if not freq in valid_freq: self.logger.warning(freq + " is not a defined frequency") self.__freq = freq @property def gran_freq(self): return self.__gran_freq @gran_freq.setter def gran_freq(self, gran_freq): try: gran_freq = gran_freq.lower() valid_gran_freq = [ 'tick', 'second', 'minute', 'hourly', 'pseudodaily', 'daily', 'weekly', 'monthly', 'quarterly', 'annually' ] if not gran_freq in valid_gran_freq: self.logger.warning(gran_freq & " is not a defined frequency") if gran_freq in ['minute', 'hourly']: self.__freq = 'intraday' elif gran_freq in ['tick', 'second']: self.__freq = 'tick' else: self.__freq = 'daily' except: pass self.__gran_freq = gran_freq @property def freq_mult(self): return self.__freq_mult @freq_mult.setter def freq_mult(self, freq_mult): self.__freq_mult = freq_mult @property def start_date(self): return self.__start_date @start_date.setter def start_date(self, start_date): self.__start_date = self.date_parser(start_date) @property def finish_date(self): return self.__finish_date @finish_date.setter def finish_date(self, finish_date): self.__finish_date = self.date_parser(finish_date) @property def cut(self): return self.__cut @cut.setter def cut(self, cut): self.__cut = cut def date_parser(self, date): if isinstance(date, str): date1 = datetime.datetime.utcnow() if date is 'midnight': date1 = datetime.datetime(date1.year, date1.month, date1.day, 0, 0, 0) elif date is 'decade': date1 = date1 - timedelta(days=365 * 10) elif date is 'year': date1 = date1 - timedelta(days=365) elif date is 'month': date1 = date1 - timedelta(days=30) elif date is 'week': date1 = date1 - timedelta(days=7) elif date is 'day': date1 = date1 - timedelta(days=1) elif date is 'hour': date1 = date1 - timedelta(hours=1) else: # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M' try: date1 = datetime.datetime.strptime(date, '%b %d %Y %H:%M') except: # self.logger.warning("Attempted to parse date") i = 0 # format expected '1 Jun 2005 01:33', '%d %b %Y %H:%M' try: date1 = datetime.datetime.strptime(date, '%d %b %Y %H:%M') except: # self.logger.warning("Attempted to parse date") i = 0 try: date1 = datetime.datetime.strptime(date, '%b %d %Y') except: # self.logger.warning("Attempted to parse date") i = 0 try: date1 = datetime.datetime.strptime(date, '%d %b %Y') except: # self.logger.warning("Attempted to parse date") i = 0 else: import pandas date1 = pandas.Timestamp(date) return date1 @property def cache_algo(self): return self.__cache_algo @cache_algo.setter def cache_algo(self, cache_algo): cache_algo = cache_algo.lower() valid_cache_algo = [ 'internet_load', 'internet_load_return', 'cache_algo', 'cache_algo_return' ] if not cache_algo in valid_cache_algo: self.logger.warning(cache_algo + " is not a defined caching scheme") self.__cache_algo = cache_algo @property def environment(self): return self.__environment @environment.setter def environment(self, environment): environment = environment.lower() valid_environment = ['prod', 'backtest'] if not environment in valid_environment: self.logger.warning(environment + " is not a defined environment.") self.__environment = environment @property def trade_side(self): return self.__trade_side @trade_side.setter def trade_side(self, trade_side): trade_side = trade_side.lower() valid_trade_side = ['trade', 'bid', 'ask'] if not trade_side in valid_trade_side: self.logger.warning(trade_side + " is not a defined trade side.") self.__trade_side = trade_side @property def expiry_date(self): return self.__expiry_date @expiry_date.setter def expiry_date(self, expiry_date): self.__expiry_date = self.date_parser(expiry_date) @property def abstract_curve(self): return self.__abstract_curve @abstract_curve.setter def abstract_curve(self, abstract_curve): if abstract_curve is not None: self.__abstract_curve_key = abstract_curve.generate_key() else: self.__abstract_curve_key = None self.__abstract_curve = abstract_curve @property def overrides(self): return self.__overrides @overrides.setter def overrides(self, overrides): self.__overrides = overrides def _flatten_list(self, list_of_lists): """Flattens list, particularly useful for combining baskets Parameters ---------- list_of_lists : str (list) List to be flattened Returns ------- """ result = [] for i in list_of_lists: # Only append if i is a basestring (superclass of string) if isinstance(i, str): result.append(i) # Otherwise call this function recursively else: result.extend(self._flatten_list(i)) return result
def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None, finish_date=None, db_server=constants.db_server, db_port=constants.db_port, username=constants.db_username, password=constants.db_password): """Reads time series cache from disk in either HDF5 or bcolz Parameters ---------- fname : str (or list) file to be read from engine : str (optional) 'hd5' - reads HDF5 files (default) 'arctic' - reads from Arctic/MongoDB database 'bcolz' - reads from bcolz file (not fully implemented) 'parquet' - reads from Parquet start_date : str/datetime (optional) Start date finish_date : str/datetime (optional) Finish data db_server : str IP address of MongdDB (default '127.0.0.1') Returns ------- DataFrame """ logger = LoggerManager.getLogger(__name__) data_frame_list = [] if not (isinstance(fname, list)): if '*' in fname: fname = glob.glob(fname) else: fname = [fname] for fname_single in fname: logger.debug("Reading " + fname_single + "..") if engine == 'parquet' and '.gzip' not in fname_single and '.parquet' not in fname_single: fname_single = fname_single + '.parquet' if (engine == 'bcolz'): try: name = self.get_bcolz_filename(fname_single) zlens = bcolz.open(rootdir=name) data_frame = zlens.todataframe() data_frame.index = pandas.DatetimeIndex(data_frame['DTS_']) data_frame.index.name = 'Date' del data_frame['DTS_'] # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas data_frame.columns = self.find_replace_chars( data_frame.columns, _replace_chars, _invalid_chars) data_frame.columns = [x[2:] for x in data_frame.columns] except: data_frame = None elif (engine == 'redis'): fname_single = os.path.basename(fname_single).replace('.', '_') msg = None try: # for pyarrow context = pa.default_serialization_context() r = redis.StrictRedis(host=db_server, port=db_port, db=0) # is there a compressed key stored?) k = r.keys('comp_*_' + fname_single) # if so, then it means that we have stored it as a compressed object # if have more than 1 element, take the last (which will be the latest to be added) if (len(k) >= 1): k = k[-1].decode('utf-8') comp = r.get(k) siz = int(k.split('_')[1]) dec = pa.decompress(comp, codec='lz4', decompressed_size=siz) msg = context.deserialize(dec) else: msg = r.get(fname_single) # print(fname_single) if msg is not None: msg = context.deserialize(msg) # logger.warning("Key " + fname_single + " not in Redis cache?") except Exception as e: logger.info("Cache not existent for " + fname_single + " in Redis: " + str(e)) if msg is None: data_frame = None else: logger.info('Load Redis cache: ' + fname_single) data_frame = msg # pandas.read_msgpack(msg) elif (engine == 'arctic'): socketTimeoutMS = 2 * 1000 import pymongo from arctic import Arctic fname_single = os.path.basename(fname_single).replace('.', '_') logger.info('Load Arctic/MongoDB library: ' + fname_single) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False ) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS) # Access the library try: library = store[fname_single] if start_date is None and finish_date is None: item = library.read(fname_single) else: from arctic.date import DateRange item = library.read( fname_single, date_range=DateRange( start_date.replace(tzinfo=None), finish_date.replace(tzinfo=None))) c.close() logger.info('Read ' + fname_single) data_frame = item.data except Exception as e: logger.warning('Library may not exist or another error: ' + fname_single + ' & message is ' + str(e)) data_frame = None elif self.path_exists(self.get_h5_filename(fname_single)): store = pandas.HDFStore(self.get_h5_filename(fname_single)) data_frame = store.select("data") if ('intraday' in fname_single): data_frame = data_frame.astype('float32') store.close() elif self.path_exists(fname_single) and '.csv' in fname_single: data_frame = pandas.read_csv(fname_single, index_col=0) data_frame.index = pd.to_datetime(data_frame.index) elif self.path_exists(fname_single): data_frame = self.read_parquet(fname_single) # data_frame = pandas.read_parquet(fname_single) data_frame_list.append(data_frame) if len(data_frame_list) == 1: return data_frame_list[0] return data_frame_list
def __init__(self, data_source=None, start_date='year', finish_date=datetime.datetime.utcnow(), tickers=None, category=None, freq_mult=1, freq="daily", gran_freq=None, cut="NYC", fields=['close'], cache_algo="internet_load_return", vendor_tickers=None, vendor_fields=None, environment="backtest", trade_side='trade', expiry_date=None, md_request=None, abstract_curve=None, overrides={}): self.logger = LoggerManager().getLogger(__name__) # can deep copy MarketDataRequest (use a lock, so can be used with threading when downloading time series) if md_request is not None: import threading lock = threading.Lock() with lock: import copy self.freq_mult = copy.deepcopy(md_request.freq_mult) # define frequency of data self.gran_freq = copy.deepcopy(md_request.gran_freq) self.freq_mult = copy.deepcopy(md_request.freq_mult) self.freq = copy.deepcopy(md_request.freq) # data source, start and fin self.data_source = copy.deepcopy(md_request.data_source) self.start_date = copy.deepcopy(md_request.start_date) self.finish_date = copy.deepcopy(md_request.finish_date) self.category = copy.deepcopy( md_request.category) # special predefined categories self.cut = copy.deepcopy( md_request.cut ) # closing time of the data (eg. NYC, LDN, TOK etc) self.fields = copy.deepcopy( md_request.fields) # fields, eg. close, high, low, open self.cache_algo = copy.deepcopy( md_request.cache_algo ) # internet_load_return (cache_algo_return is for future use) self.vendor_tickers = copy.deepcopy( md_request.vendor_tickers) # define vendor tickers self.vendor_fields = copy.deepcopy( md_request.vendor_fields) # define vendor fields self.environment = copy.deepcopy( md_request.environment ) # backtest environment only supported at present self.trade_side = copy.deepcopy(md_request.trade_side) self.expiry_date = copy.deepcopy(md_request.expiry_date) # self.abstract_curve = copy.deepcopy(md_request.abstract_curve) self.overrides = copy.deepcopy(md_request.overrides) self.tickers = copy.deepcopy( md_request.tickers ) # need this after category in case have wildcard else: self.freq_mult = freq_mult # define frequency of data self.gran_freq = gran_freq self.freq_mult = freq_mult self.freq = freq # data source, start and fin self.data_source = data_source self.start_date = start_date self.finish_date = finish_date self.category = category # special predefined categories self.cut = cut # closing time of the data (eg. NYC, LDN, TOK etc) self.fields = fields # fields, eg. close, high, low, open self.cache_algo = cache_algo # internet_load_return (cache_algo_return is for future use) self.vendor_tickers = vendor_tickers # define vendor tickers self.vendor_fields = vendor_fields # define vendor fields self.environment = environment # backtest environment only supported at present self.trade_side = trade_side self.expiry_date = expiry_date self.abstract_curve = abstract_curve self.overrides = overrides self.tickers = tickers
def process_message(self, msg): data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA) logger = LoggerManager().getLogger(__name__) # logger.info("Processing intraday data for " + str(self._options.security)) data_vals = list(data.values()) # data_matrix = numpy.zeros([len(data_vals), 6]) # data_matrix.fill(numpy.nan) # # date_index = [None] * len(data_vals) # # for i in range(0, len(data_vals)): # data_matrix[i][0] = data_vals[i].getElementAsFloat(self.OPEN) # data_matrix[i][1] = data_vals[i].getElementAsFloat(self.HIGH) # data_matrix[i][2] = data_vals[i].getElementAsFloat(self.LOW) # data_matrix[i][3] = data_vals[i].getElementAsFloat(self.CLOSE) # data_matrix[i][4] = data_vals[i].getElementAsInteger(self.VOLUME) # data_matrix[i][5] = data_vals[i].getElementAsInteger(self.NUM_EVENTS) # # date_index[i] = data_vals[i].getElementAsDatetime(self.TIME) # # logger.info("Dates between " + str(date_index[0]) + " - " + str(date_index[-1])) # # # create pandas dataframe with the Bloomberg output # return pd.DataFrame(data = data_matrix, index = date_index, # columns=['open', 'high', 'low', 'close', 'volume', 'events']) ## for loop method is touch slower # time_list = [] # data_table = [] # for bar in data_vals: # data_table.append([bar.getElementAsFloat(self.OPEN), # bar.getElementAsFloat(self.HIGH), # bar.getElementAsFloat(self.LOW), # bar.getElementAsFloat(self.CLOSE), # bar.getElementAsInteger(self.VOLUME), # bar.getElementAsInteger(self.NUM_EVENTS)]) # # time_list.append(bar.getElementAsDatetime(self.TIME)) # each price time point has multiple fields - marginally quicker tuple = [([ bar.getElementAsFloat(self.OPEN), bar.getElementAsFloat(self.HIGH), bar.getElementAsFloat(self.LOW), bar.getElementAsFloat(self.CLOSE), bar.getElementAsInteger(self.VOLUME), bar.getElementAsInteger(self.NUM_EVENTS) ], bar.getElementAsDatetime(self.TIME)) for bar in data_vals] data_table = list(map(itemgetter(0), tuple)) time_list = list(map(itemgetter(1), tuple)) try: logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1])) except: logger.info("No dates retrieved") return None # create pandas dataframe with the Bloomberg output return pd.DataFrame( data=data_table, index=time_list, columns=['open', 'high', 'low', 'close', 'volume', 'events'])
def __init__(self): self.logger = LoggerManager().getLogger(__name__) self._techind = None self._signal = None
def load_time_series(self, market_data_request): # if(BBGLowLevelTemplate._session is None): logger = LoggerManager().getLogger(__name__) session = self.start_bloomberg_session() # else: # session = BBGLowLevelTemplate._session try: # if can't open the session, kill existing one # then try reopen (up to 5 times...) i = 0 while i < 5: if session is not None: if not session.openService("//blp/refdata"): logger.info("Try reopening Bloomberg session... try " + str(i)) self.kill_session( session ) # need to forcibly kill_session since can't always reopen session = self.start_bloomberg_session() if session is not None: if session.openService("//blp/refdata"): i = 6 else: logger.info("Try opening Bloomberg session... try " + str(i)) session = self.start_bloomberg_session() i = i + 1 # give error if still doesn't work after several tries.. if not session.openService("//blp/refdata"): logger.error("Failed to open //blp/refdata") return logger.info("Creating request...") eventQueue = blpapi.EventQueue() # eventQueue = None # create a request from blpapi import CorrelationId cid = CorrelationId() options = self.fill_options(market_data_request) if options.security is not None: self.send_bar_request(session, eventQueue, options, cid) logger.info("Waiting for data to be returned...") data_frame = self.event_loop(session) else: logger.warn("No ticker or field specified!") data_frame = None finally: # stop the session (will fail if NoneType) try: session.stop() except: pass return data_frame
def process_message(self, msg): logger = LoggerManager().getLogger(__name__) data = collections.defaultdict(dict) # process received events securityDataArray = msg.getElement('securityData') index = 0 single = False for securityData in list(securityDataArray.values()): ticker = securityData.getElementAsString("security") fieldData = securityData.getElement("fieldData") for field in fieldData.elements(): if not field.isValid(): field_name = "%s" % field.name() logger.error(field_name + " is NULL") elif field.isArray(): # iterate over complex data returns. field_name = "%s" % field.name() for i, row in enumerate(field.values()): try: field_val = re.findall(r'"(.*?)"', "%s" % row)[0] except: e = row.getElement(0) # k = str(e.name()) field_val = e.getValue() data[(field_name, ticker)][index] = field_val index = index + 1 else: field_name = "%s" % field.name() data[(field_name, ticker)][0] = field.getValueAsString() index = index + 1 single = True # no need to create multi-index late, because just row!! CAREFUL!! needed for futures expiries fieldExceptionArray = securityData.getElement("fieldExceptions") for fieldException in list(fieldExceptionArray.values()): errorInfo = fieldException.getElement("errorInfo") print(errorInfo.getElementAsString("category"), ":", \ fieldException.getElementAsString("fieldId")) print("stop") # explicitly state from_dict (buggy if create pd.DataFrame(data) data_frame = pd.DataFrame.from_dict(data) # if obsolete ticker could return no values if (not (data_frame.empty)): # if not(single): # pass # data_frame.columns = pd.MultiIndex.from_tuples(data, names=['field', 'ticker']) logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) else: return None return data_frame
class BacktestRequest(MarketDataRequest): """Contains parameters necessary to define a backtest, including start date, finish date, transaction cost, etc Used by TradingModel and Backtest to construct backtested returns for trading strategies """ def __init__(self): super(MarketDataRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None # output parameters for backtest (should we add returns statistics on legends, write CSVs with returns etc.) self.__plot_start = None self.__calc_stats = True self.__write_csv = False self.__plot_interim = False self.__include_benchmark = False self.__tech_params = TechParams() # default parameters for portfolio level vol adjustment self.__portfolio_vol_adjust = False self.__portfolio_vol_period_shift = 0 self.__portfolio_vol_rebalance_freq = None self.__portfolio_vol_resample_freq = None self.__portfolio_vol_resample_type = 'mean' self.__portfolio_vol_target = 0.1 # 10% vol target self.__portfolio_vol_max_leverage = None self.__portfolio_vol_periods = 20 self.__portfolio_vol_obs_in_year = 252 # default parameters for signal level vol adjustment self.__signal_vol_adjust = False self.__signal_vol_period_shift = 0 self.__signal_vol_rebalance_freq = None self.__signal_vol_resample_freq = None self.__signal_vol_resample_type = 'mean' self.__signal_vol_target = 0.1 # 10% vol target self.__signal_vol_max_leverage = None self.__signal_vol_periods = 20 self.__signal_vol_obs_in_year = 252 # portfolio notional size self.__portfolio_notional_size = None self.__portfolio_combination = None # parameters for maximum position limits (expressed as whole portfolio) self.__max_net_exposure = None self.__max_abs_exposure = None self.__position_clip_rebalance_freq = None self.__position_clip_resample_freq = None # by default apply max position criterion on last business day of month self.__position_clip_resample_type = 'mean' self.__position_clip_period_shift = 0 # take profit and stop loss parameters self.__take_profit = None self.__stop_loss = None # should we delay the signal? self.__signal_delay = 0 ##### properties for output of the backtest @property def plot_start(self): return self.__plot_start @plot_start.setter def plot_start(self, plot_start): self.__plot_start = plot_start @property def calc_stats(self): return self.__calc_stats @calc_stats.setter def calc_stats(self, calc_stats): self.__calc_stats = calc_stats @property def write_csv(self): return self.__write_csv @write_csv.setter def write_csv(self, write_csv): self.__write_csv = write_csv @property def plot_interim(self): return self.__plot_interim @plot_interim.setter def plot_interim(self, plot_interim): self.__plot_interim = plot_interim @property def include_benchmark(self): return self.__include_benchmark @include_benchmark.setter def include_benchmark(self, include_benchmark): self.__include_benchmark = include_benchmark ##### properties for portfolio level volatility adjustment @property def portfolio_vol_adjust(self): return self.__portfolio_vol_adjust @portfolio_vol_adjust.setter def portfolio_vol_adjust(self, portfolio_vol_adjust): self.__portfolio_vol_adjust = portfolio_vol_adjust @property def portfolio_vol_rebalance_freq(self): return self.__portfolio_vol_rebalance_freq @portfolio_vol_rebalance_freq.setter def portfolio_vol_rebalance_freq(self, portfolio_vol_rebalance_freq): self.__portfolio_vol_rebalance_freq = portfolio_vol_rebalance_freq @property def portfolio_vol_resample_type(self): return self.__portfolio_vol_resample_type @portfolio_vol_resample_type.setter def portfolio_vol_resample_type(self, portfolio_vol_resample_type): self.__portfolio_vol_resample_type = portfolio_vol_resample_type @property def portfolio_vol_resample_freq(self): return self.__portfolio_vol_resample_freq @portfolio_vol_resample_freq.setter def portfolio_vol_resample_freq(self, portfolio_vol_resample_freq): self.__portfolio_vol_resample_freq = portfolio_vol_resample_freq @property def portfolio_vol_period_shift(self): return self.__portfolio_vol_period_shift @portfolio_vol_period_shift.setter def portfolio_vol_period_shift(self, portfolio_vol_period_shift): self.__portfolio_vol_period_shift = portfolio_vol_period_shift @property def portfolio_vol_target(self): return self.__portfolio_vol_target @portfolio_vol_target.setter def portfolio_vol_target(self, portfolio_vol_target): self.__portfolio_vol_target = portfolio_vol_target @property def portfolio_vol_max_leverage(self): return self.__portfolio_vol_max_leverage @portfolio_vol_max_leverage.setter def portfolio_vol_max_leverage(self, portfolio_vol_max_leverage): self.__portfolio_vol_max_leverage = portfolio_vol_max_leverage @property def portfolio_vol_periods(self): return self.__portfolio_vol_periods @portfolio_vol_periods.setter def portfolio_vol_periods(self, portfolio_vol_periods): self.__portfolio_vol_periods = portfolio_vol_periods @property def portfolio_vol_obs_in_year(self): return self.__portfolio_vol_obs_in_year @portfolio_vol_obs_in_year.setter def portfolio_vol_obs_in_year(self, portfolio_vol_obs_in_year): self.__portfolio_vol_obs_in_year = portfolio_vol_obs_in_year ##### properties for signal level vol adjustment @property def signal_vol_adjust(self): return self.__signal_vol_adjust @signal_vol_adjust.setter def signal_vol_adjust(self, signal_vol_adjust): self.__signal_vol_adjust = signal_vol_adjust @property def signal_vol_rebalance_freq(self): return self.__signal_vol_rebalance_freq @signal_vol_rebalance_freq.setter def signal_vol_rebalance_freq(self, signal_vol_rebalance_freq): self.__signal_vol_rebalance_freq = signal_vol_rebalance_freq @property def signal_vol_resample_type(self): return self.__signal_vol_resample_type @signal_vol_resample_type.setter def signal_vol_resample_type(self, signal_vol_resample_type): self.__signal_vol_resample_type = signal_vol_resample_type @property def signal_vol_resample_freq(self): return self.__signal_vol_resample_freq @signal_vol_resample_freq.setter def signal_vol_resample_freq(self, signal_vol_resample_freq): self.__signal_vol_resample_freq = signal_vol_resample_freq @property def signal_vol_period_shift(self): return self.__signal_vol_period_shift @signal_vol_period_shift.setter def signal_vol_period_shift(self, signal_vol_period_shift): self.__signal_vol_period_shift = signal_vol_period_shift @property def signal_vol_target(self): return self.__signal_vol_target @signal_vol_target.setter def signal_vol_target(self, signal_vol_target): self.__signal_vol_target = signal_vol_target @property def signal_vol_max_leverage(self): return self.__signal_vol_max_leverage @signal_vol_max_leverage.setter def signal_vol_max_leverage(self, signal_vol_max_leverage): self.__signal_vol_max_leverage = signal_vol_max_leverage @property def signal_vol_periods(self): return self.__signal_vol_periods @signal_vol_periods.setter def signal_vol_periods(self, signal_vol_periods): self.__signal_vol_periods = signal_vol_periods @property def signal_vol_obs_in_year(self): return self.__signal_vol_obs_in_year @signal_vol_obs_in_year.setter def signal_vol_obs_in_year(self, signal_vol_obs_in_year): self.__signal_vol_obs_in_year = signal_vol_obs_in_year ##### portfolio notional size @property def portfolio_notional_size(self): return self.__portfolio_notional_size @portfolio_notional_size.setter def portfolio_notional_size(self, portfolio_notional_size): self.__portfolio_notional_size = float(portfolio_notional_size) ##### portfolio weights (sum, mean or dictionary of weights) @property def portfolio_combination(self): return self.__portfolio_combination @portfolio_combination.setter def portfolio_combination(self, portfolio_combination): self.__portfolio_combination = portfolio_combination ##### properties for maximum position constraints @property def max_net_exposure(self): return self.__max_net_exposure @max_net_exposure.setter def max_net_exposure(self, max_net_exposure): self.__max_net_exposure = max_net_exposure @property def max_abs_exposure(self): return self.__max_abs_exposure @max_abs_exposure.setter def max_abs_exposure(self, max_abs_exposure): self.__max_abs_exposure = max_abs_exposure @property def position_clip_rebalance_freq(self): return self.__position_clip_rebalance_freq @position_clip_rebalance_freq.setter def position_clip_rebalance_freq(self, position_clip_rebalance_freq): self.__position_clip_rebalance_freq = position_clip_rebalance_freq @property def position_clip_resample_type(self): return self.__position_clip_resample_type @position_clip_resample_type.setter def position_clip_resample_type(self, position_clip_resample_type): self.__position_clip_resample_type = position_clip_resample_type @property def position_clip_resample_freq(self): return self.__position_clip_resample_freq @position_clip_resample_freq.setter def position_clip_resample_freq(self, position_clip_resample_freq): self.__position_clip_resample_freq = position_clip_resample_freq @property def position_clip_period_shift(self): return self.__position_clip_period_shift @position_clip_period_shift.setter def position_clip_period_shift(self, position_clip_period_shift): self.__position_clip_period_shift = position_clip_period_shift ##### stop loss and take profit @property def stop_loss(self): return self.__stop_loss @stop_loss.setter def stop_loss(self, stop_loss): self.__stop_loss = stop_loss @property def take_profit(self): return self.__take_profit @take_profit.setter def take_profit(self, take_profit): self.__take_profit = take_profit ##### tech indicators and spot bp tc @property def tech_params(self): return self.__tech_params @tech_params.setter def tech_params(self, tech_params): self.__tech_params = tech_params @property def spot_tc_bp(self): return self.__spot_tc_bp @spot_tc_bp.setter def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0) #### FOR FUTURE USE ### @property def signal_name(self): return self.__signal_name @signal_name.setter def signal_name(self, signal_name): self.__signal_name = signal_name @property def asset(self): return self.__asset @asset.setter def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.") self.__asset = asset @property def instrument(self): return self.__instrument @instrument.setter def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument @property def signal_delay(self): return self.__signal_delay @signal_delay.setter def signal_delay(self, signal_delay): self.__signal_delay = signal_delay
if run_example == 1 or run_example == 0: # for backtest and loading data from finmarketpy.backtest import BacktestRequest, Backtest from findatapy.market import Market, MarketDataRequest, MarketDataGenerator from findatapy.util.fxconv import FXConv # for logging from findatapy.util.loggermanager import LoggerManager # for signal generation from finmarketpy.economics import TechIndicator, TechParams # for plotting from chartpy import Chart, Style logger = LoggerManager().getLogger(__name__) import datetime backtest = Backtest() br = BacktestRequest() fxconv = FXConv() # get all asset data br.start_date = "02 Jan 1990" br.finish_date = datetime.datetime.utcnow() br.spot_tc_bp = 2.5 # 2.5 bps bid/ask spread br.ann_factor = 252 # have vol target for each signal br.signal_vol_adjust = True
try: base_index = self.order.index(base) except ValueError: base_index = -1 try: terms_index = self.order.index(terms) except ValueError: terms_index = -1 if (base_index < 0 and terms_index > 0): return terms + base if (base_index > 0 and terms_index < 0): return base + terms elif (base_index > terms_index): return terms + base elif (terms_index > base_index): return base + terms return cross if __name__ == '__main__': logger = LoggerManager.getLogger(__name__) fxconv = FXConv() if True: logger.info(fxconv.g10_crosses())
class IOEngine(object): """Write and reads time series data to disk in various formats, CSV, HDF5 (fixed and table formats) and MongoDB/Arctic. Can be used to save down output of finmarketpy backtests and also to cache market data locally. Also supports BColz (but not currently stable). Planning to add other interfaces such as SQL etc. """ def __init__(self): self.logger = LoggerManager().getLogger(__name__) ### functions to handle Excel on disk def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False): """Writes Pandas data frame to disk in Excel format Parameters ---------- fname : str Excel filename to be written to sheet : str sheet in excel data_frame : DataFrame data frame to be written create_new : boolean to create a new Excel file """ if (create_new): writer = pandas.ExcelWriter(fname, engine='xlsxwriter') else: if os.path.isfile(fname): book = load_workbook(fname) writer = pandas.ExcelWriter(fname, engine='xlsxwriter') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) else: writer = pandas.ExcelWriter(fname, engine='xlsxwriter') data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter') writer.save() writer.close() def write_time_series_to_excel_writer(self, writer, sheet, data_frame): """Writes Pandas data frame to disk in Excel format for a writer Parameters ---------- writer : ExcelWriter File handle to use for writing Excel file to disk sheet : str sheet in excel data_frame : DataFrame data frame to be written """ data_frame.to_excel(writer, sheet, engine='xlsxwriter') def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff=None, dateparse=None, postfix='.close', intraday_tz='UTC'): """Reads Excel from disk into DataFrame Parameters ---------- f_name : str Excel file path to read freq : str Frequency of data to read (intraday/daily etc) cutoff : DateTime (optional) end date to read up to dateparse : str (optional) date parser to use postfix : str (optional) postfix to add to each columns intraday_tz : str timezone of file if uses intraday data Returns ------- DataFrame """ return self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse, postfix=postfix, intraday_tz=intraday_tz, excel_sheet=excel_sheet) def remove_time_series_cache_on_disk(self, fname, engine='hdf5_fixed', db_server='127.0.0.1', db_port='6379', timeout=10, username=None, password=None): if 'hdf5' in engine: engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) pass elif (engine == 'redis'): fname = os.path.basename(fname).replace('.', '_') try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) if (fname == 'flush_all_keys'): r.flushall() else: # allow deletion of keys by pattern matching x = r.keys('*' + fname) if len(x) > 0: r.delete(x) # r.delete(fname) except Exception as e: self.logger.warning("Cannot delete non-existent key " + fname + " in Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') self.logger.info('Load MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) store.delete_library(fname) c.close() self.logger.info("Deleted MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # delete the old copy try: os.remove(h5_filename) except: pass ### functions to handle HDF5 on disk def write_time_series_cache_to_disk(self, fname, data_frame, engine='hdf5_fixed', append_data=False, db_server=DataConstants().db_server, db_port=DataConstants().db_port, username=None, password=None, filter_out_matching=None, timeout=10, use_cache_compression=DataConstants().use_cache_compression): """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic Parmeters --------- fname : str path of file data_frame : DataFrame data frame to be written to disk engine : str 'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this 'hdf5_table' - use HDF5 table format, slower but can append to 'parquet' - use Parquet 'arctic' - use Arctic/MongoDB database 'redis' - use Redis append_data : bool False - write a fresh copy of data on disk each time True - append data to disk db_server : str Database server for arctic (default: '127.0.0.1') timeout : int Number of seconds to do timeout """ # default HDF5 format hdf5_format = 'fixed' if 'hdf5' in engine: hdf5_format = engine.split('_')[1] engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) data_frame.columns = self.find_replace_chars(data_frame.columns, _invalid_chars, _replace_chars) data_frame.columns = ['A_' + x for x in data_frame.columns] data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns') bcolzpath = self.get_bcolz_filename(fname) shutil.rmtree(bcolzpath, ignore_errors=True) zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath) elif (engine == 'redis'): fname = os.path.basename(fname).replace('.', '_') try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) if data_frame is not None: if isinstance(data_frame, pandas.DataFrame): # msgpack/blosc is deprecated # r.set(fname, data_frame.to_msgpack(compress='blosc')) # now uses pyarrow context = pa.default_serialization_context() ser = context.serialize(data_frame).to_buffer() if use_cache_compression: comp = pa.compress(ser, codec='lz4', asbytes=True) siz = len(ser) # siz = 3912 r.set('comp_' + str(siz) + '_' + fname, comp) else: r.set(fname, ser.to_pybytes()) self.logger.info("Pushed " + fname + " to Redis") else: self.logger.info("Object " + fname + " is empty, not pushed to Redis.") except Exception as e: self.logger.warning("Couldn't push " + fname + " to Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') self.logger.info('Load Arctic/MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) database = None try: database = store[fname] except: pass if database is None: store.initialize_library(fname, audit=False) self.logger.info("Created MongoDB library: " + fname) else: self.logger.info("Got MongoDB library: " + fname) # Access the library library = store[fname] if ('intraday' in fname): data_frame = data_frame.astype('float32') if filter_out_matching is not None: cols = data_frame.columns new_cols = [] for col in cols: if filter_out_matching not in col: new_cols.append(col) data_frame = data_frame[new_cols] # problems with Arctic when writing timezone to disk sometimes, so strip data_frame = data_frame.copy().tz_localize(None) # can duplicate values if we have existing dates if append_data: library.append(fname, data_frame) else: library.write(fname, data_frame) c.close() self.logger.info("Written MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # append data only works for HDF5 stored as tables (but this is much slower than fixed format) # removes duplicated entries at the end if append_data: store = pandas.HDFStore(h5_filename, format=hdf5_format, complib="zlib", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') # get last row which matches and remove everything after that (because append # function doesn't check for duplicated rows nrows = len(store['data'].index) last_point = data_frame.index[-1] i = nrows - 1 while (i > 0): read_index = store.select('data', start=i, stop=nrows).index[0] if (read_index <= last_point): break i = i - 1 # remove rows at the end, which are duplicates of the incoming time series store.remove(key='data', start=i, stop=nrows) store.put(key='data', value=data_frame, format=hdf5_format, append=True) store.close() else: h5_filename_temp = self.get_h5_filename(fname + ".temp") # delete the old copy try: os.remove(h5_filename_temp) except: pass store = pandas.HDFStore(h5_filename_temp, format=hdf5_format, complib="zlib", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') store.put(key='data', value=data_frame, format=hdf5_format) store.close() # delete the old copy try: os.remove(h5_filename) except: pass # once written to disk rename os.rename(h5_filename_temp, h5_filename) self.logger.info("Written HDF5: " + fname) elif (engine == 'parquet'): if fname[-5:] != '.gzip': fname = fname + '.gzip' data_frame.to_parquet(fname, compression='gzip') self.logger.info("Written Parquet: " + fname) def get_h5_filename(self, fname): """Strips h5 off filename returning first portion of filename Parameters ---------- fname : str h5 filename to strip Returns ------- str """ if fname[-3:] == '.h5': return fname return fname + ".h5" def get_bcolz_filename(self, fname): """Strips bcolz off filename returning first portion of filename Parameters ---------- fname : str bcolz filename to strip Returns ------- str """ if fname[-6:] == '.bcolz': return fname return fname + ".bcolz" def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields=None): """Write a DataFrame to disk in as an R compatible HDF5 file. Parameters ---------- data_frame : DataFrame data frame to be written fname : str file path to be written fields : list(str) columns to be written """ fname_r = self.get_h5_filename(fname) self.logger.info("About to dump R binary HDF5 - " + fname_r) data_frame32 = data_frame.astype('float32') if fields is None: fields = data_frame32.columns.values # decompose date/time into individual fields (easier to pick up in R) data_frame32['Year'] = data_frame.index.year data_frame32['Month'] = data_frame.index.month data_frame32['Day'] = data_frame.index.day data_frame32['Hour'] = data_frame.index.hour data_frame32['Minute'] = data_frame.index.minute data_frame32['Second'] = data_frame.index.second data_frame32['Millisecond'] = data_frame.index.microsecond / 1000 data_frame32 = data_frame32[ ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'] + fields] cols = data_frame32.columns store_export = pandas.HDFStore(fname_r) store_export.put('df_for_r', data_frame32, data_columns=cols) store_export.close() def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None, finish_date=None, db_server=DataConstants().db_server, db_port=DataConstants().db_port, username=None, password=None): """Reads time series cache from disk in either HDF5 or bcolz Parameters ---------- fname : str (or list) file to be read from engine : str (optional) 'hd5' - reads HDF5 files (default) 'arctic' - reads from Arctic/MongoDB database 'bcolz' = reads from bcolz file (not fully implemented) start_date : str/datetime (optional) Start date finish_date : str/datetime (optional) Finish data db_server : str IP address of MongdDB (default '127.0.0.1') Returns ------- DataFrame """ logger = LoggerManager.getLogger(__name__) data_frame_list = [] if not(isinstance(fname, list)): if '*' in fname: fname = glob.glob(fname) else: fname = [fname] for fname_single in fname: logger.debug("Reading " + fname_single + "..") if (engine == 'bcolz'): try: name = self.get_bcolz_filename(fname_single) zlens = bcolz.open(rootdir=name) data_frame = zlens.todataframe() data_frame.index = pandas.DatetimeIndex(data_frame['DTS_']) data_frame.index.name = 'Date' del data_frame['DTS_'] # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas data_frame.columns = self.find_replace_chars(data_frame.columns, _replace_chars, _invalid_chars) data_frame.columns = [x[2:] for x in data_frame.columns] except: data_frame = None elif (engine == 'redis'): fname_single = os.path.basename(fname_single).replace('.', '_') msg = None try: # for pyarrow context = pa.default_serialization_context() r = redis.StrictRedis(host=db_server, port=db_port, db=0) # is there a compressed key stored?) k = r.keys('comp_*_' + fname_single) # if so, then it means that we have stored it as a compressed object # if have more than 1 element, take the last (which will be the latest to be added) if (len(k) >= 1): k = k[-1].decode('utf-8') comp = r.get(k) siz = int(k.split('_')[1]) dec = pa.decompress(comp, codec='lz4', decompressed_size=siz) msg = context.deserialize(dec) else: msg = r.get(fname_single) # print(fname_single) if msg is not None: msg = context.deserialize(msg) # self.logger.warning("Key " + fname_single + " not in Redis cache?") except Exception as e: self.logger.info("Cache not existent for " + fname_single + " in Redis: " + str(e)) if msg is None: data_frame = None else: self.logger.info('Load Redis cache: ' + fname_single) data_frame = msg # pandas.read_msgpack(msg) elif (engine == 'arctic'): socketTimeoutMS = 2 * 1000 import pymongo from arctic import Arctic fname_single = os.path.basename(fname_single).replace('.', '_') self.logger.info('Load Arctic/MongoDB library: ' + fname_single) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS) # Access the library try: library = store[fname_single] if start_date is None and finish_date is None: item = library.read(fname_single) else: from arctic.date import DateRange item = library.read(fname_single, date_range=DateRange(start_date.replace(tzinfo=None), finish_date.replace(tzinfo=None))) c.close() self.logger.info('Read ' + fname_single) data_frame = item.data except Exception as e: self.logger.warning('Library may not exist or another error: ' + fname_single + ' & message is ' + str(e)) data_frame = None elif os.path.isfile(self.get_h5_filename(fname_single)): store = pandas.HDFStore(self.get_h5_filename(fname_single)) data_frame = store.select("data") if ('intraday' in fname_single): data_frame = data_frame.astype('float32') store.close() elif os.path.isfile(fname_single): data_frame = pandas.read_parquet(fname_single) data_frame_list.append(data_frame) if len(data_frame_list) == 1: return data_frame_list[0] return data_frame_list ### functions for CSV reading and writing def write_time_series_to_csv(self, csv_path, data_frame): data_frame.to_csv(csv_path) def read_csv_data_frame(self, f_name, freq, cutoff=None, dateparse=None, postfix='.close', intraday_tz='UTC', excel_sheet=None): """Reads CSV/Excel from disk into DataFrame Parameters ---------- f_name : str CSV/Excel file path to read freq : str Frequency of data to read (intraday/daily etc) cutoff : DateTime (optional) end date to read up to dateparse : str (optional) date parser to use postfix : str (optional) postfix to add to each columns intraday_tz : str (optional) timezone of file if uses intraday data excel_sheet : str (optional) Excel sheet to be read Returns ------- DataFrame """ if (freq == 'intraday'): if dateparse is None: dateparse = lambda x: datetime.datetime(*map(int, [x[6:10], x[3:5], x[0:2], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'dukascopy': dateparse = lambda x: datetime.datetime(*map(int, [x[0:4], x[5:7], x[8:10], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'c': # use C library for parsing dates, several hundred times quicker # requires compilation of library to install import ciso8601 dateparse = lambda x: ciso8601.parse_datetime(x) if excel_sheet is None: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=True, date_parser=dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA']) data_frame = data_frame.astype('float32') data_frame.index.names = ['Date'] old_cols = data_frame.columns new_cols = [] # add '.close' to each column name for col in old_cols: new_cols.append(col + postfix) data_frame.columns = new_cols else: # daily data if 'events' in f_name: data_frame = pandas.read_csv(f_name) # very slow conversion data_frame = data_frame.convert_objects(convert_dates='coerce') else: if excel_sheet is None: try: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["DATE"], date_parser=dateparse) except: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["Date"], date_parser=dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA']) # convert Date to Python datetime # datetime data_frame['Date1'] = data_frame.index # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S') # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]), # int(x[12:13]), int(x[15:16]), int(x[18:19]))) # data_frame.index = data_frame['Date1'] # data_frame.drop('Date1') # slower method: data_frame.index = pandas.to_datetime(data_frame.index) if (freq == 'intraday'): # assume time series are already in UTC and assign this (can specify other time zones) data_frame = data_frame.tz_localize(intraday_tz) # end cutoff date if cutoff is not None: if (isinstance(cutoff, str)): cutoff = parse(cutoff) data_frame = data_frame.loc[data_frame.index < cutoff] return data_frame def find_replace_chars(self, array, to_find, replace_with): for i in range(0, len(to_find)): array = [x.replace(to_find[i], replace_with[i]) for x in array] return array def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None): """Converts CSV file to HDF5 file Parameters ---------- f_name : str File name to be read category : str data category of file (used in HDF5 filename) freq : str intraday/daily frequency (used in HDF5 filename) cutoff : DateTime (optional) filter dates up to here dateparse : str date parser to use """ self.logger.info("About to read... " + f_name) data_frame = self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse) category_f_name = self.create_cache_file_name(category) self.write_time_series_cache_to_disk(category_f_name, data_frame) def clean_csv_file(self, f_name): """Cleans up CSV file (removing empty characters) before writing back to disk Parameters ---------- f_name : str CSV file to be cleaned """ with codecs.open(f_name, 'rb', 'utf-8') as myfile: data = myfile.read() # clean file first if dirty if data.count('\x00'): self.logger.info('Cleaning CSV...') with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of: of.write(data.replace('\x00', '')) shutil.move(f_name + '.tmp', f_name) def create_cache_file_name(self, filename): return DataConstants().folder_time_series_data + "/" + filename # TODO refactor IOEngine so that each database is implemented in a subclass of DBEngine def get_engine(self, engine='hdf5_fixed'): pass
source + '.' + sourceticker] @staticmethod def convert_vendor_to_library_field(source, sourcefield): return ConfigManager._dict_time_series_fields_list_vendor_to_library[ source + '.' + sourcefield] @staticmethod def convert_library_to_vendor_field(source, field): return ConfigManager._dict_time_series_fields_list_library_to_vendor[ source + '.' + field] ## test function if __name__ == '__main__': logger = LoggerManager().getLogger(__name__) categories = ConfigManager().get_categories_from_fields() logger.info("Categories from fields list") print(categories) categories = ConfigManager().get_categories_from_tickers() logger.info("Categories from tickers list") print(categories) filter = 'events' categories_filtered = ConfigManager( ).get_categories_from_tickers_selective_filter(filter)