class BacktestRequest(MarketDataRequest): def __init__(self): super(MarketDataRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None self.__tech_params = TechParams() @property def signal_name(self): return self.__signal_name @signal_name.setter def signal_name(self, signal_name): self.__signal_name = signal_name @property def tech_params(self): return self.__tech_params @tech_params.setter def tech_params(self, tech_params): self.__tech_params = tech_params @property def spot_tc_bp(self): return self.__spot_tc_bp @spot_tc_bp.setter def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0) @property def asset(self): return self.__asset @asset.setter def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.") self.__asset = asset @property def instrument(self): return self.__instrument @instrument.setter def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument
def force_type_conversion(self, data_frame): constants = DataConstants() logger = LoggerManager().getLogger(__name__) if data_frame is not None: if not (data_frame.empty): # Need to convert numerical and datetime columns separately # post pandas 0.23 for c in data_frame.columns: is_date = False # Special case for ECO_RELEASE_DT / FIRST_REVISION_DATE if 'ECO_RELEASE_DT' in c or 'FIRST_REVISION_DATE' in c: try: temp_col = [] # data_frame[c].values for i in range(0, len(data_frame[c].values)): try: temp_col.append( pd.to_datetime(str( int(data_frame[c].values[i])), format='%Y%m%d')) except: temp_col.append(np.datetime64('NaT')) data_frame[c] = temp_col except Exception as e: logger.warning( "Couldn't convert " + str(c) + " to date.. was this column empty? " + str(e)) else: # Only convert those Bloomberg reference fields to # dates which have been listed explicitly for d in constants.always_date_columns: if d in c: try: data_frame[c] = pd.to_datetime( data_frame[c], errors='coerce') is_date = True break except: pass # Otherwise this is not a date field so attempt to # convert into numbers if not (is_date): try: data_frame[c] = pd.to_numeric(data_frame[c], errors='ignore') except: pass logger.debug("Returning converted dataframe...") return data_frame
def _get_full_cal(self, cal): holidays_list = [] # Calendars which have been hardcoded in the parquet file (which users # may also edit) if len(cal) == 6: # Eg. EURUSD (load EUR and USD calendars and combine the holidays) holidays_list.append( [self._get_full_cal(cal[0:3]), self._get_full_cal(cal[3:6])]) elif len(cal) == 9: holidays_list.append([ self._get_full_cal(cal[0:3]), self._get_full_cal(cal[3:6]), self._get_full_cal(cal[6:9]) ]) else: if cal == 'FX' or cal == 'NYX': # Filter for Christmas & New Year's Day for i in range(1999, 2025): holidays_list.append(pd.Timestamp(str(i) + "-12-25")) holidays_list.append(pd.Timestamp(str(i) + "-01-01")) elif cal == 'NYD' or cal == 'NEWYEARSDAY': # Filter for New Year's Day for i in range(1999, 2025): holidays_list.append(pd.Timestamp(str(i) + "-01-01")) elif cal == 'WDY' or cal == 'WEEKDAY': bday = CustomBusinessDay(weekmask='Sat Sun') holidays_list.append([ x for x in pd.date_range( '01 Jan 1999', '31 Dec 2025', freq=bday) ]) elif cal == 'WKD': # pass # holidays_list.append() else: label = cal + ".holiday-dates" try: holidays_list = self._holiday_df[label].dropna().tolist() except: logger = LoggerManager().getLogger(__name__) logger.warning(cal + " holiday calendar not found.") return holidays_list
class IOEngine(object): """Write and reads time series data to disk in various formats, CSV, HDF5 (fixed and table formats) and MongoDB/Arctic. Can be used to save down output of finmarketpy backtests and also to cache market data locally. Also supports BColz (but not currently stable). Planning to add other interfaces such as SQL etc. """ def __init__(self): self.logger = LoggerManager().getLogger(__name__) ### functions to handle Excel on disk def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False): """Writes Pandas data frame to disk in Excel format Parameters ---------- fname : str Excel filename to be written to sheet : str sheet in excel data_frame : DataFrame data frame to be written create_new : boolean to create a new Excel file """ if (create_new): writer = pandas.ExcelWriter(fname, engine='xlsxwriter') else: if os.path.isfile(fname): book = load_workbook(fname) writer = pandas.ExcelWriter(fname, engine='xlsxwriter') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) else: writer = pandas.ExcelWriter(fname, engine='xlsxwriter') data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter') writer.save() writer.close() def write_time_series_to_excel_writer(self, writer, sheet, data_frame): """Writes Pandas data frame to disk in Excel format for a writer Parameters ---------- writer : ExcelWriter File handle to use for writing Excel file to disk sheet : str sheet in excel data_frame : DataFrame data frame to be written """ data_frame.to_excel(writer, sheet, engine='xlsxwriter') def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff=None, dateparse=None, postfix='.close', intraday_tz='UTC'): """Reads Excel from disk into DataFrame Parameters ---------- f_name : str Excel file path to read freq : str Frequency of data to read (intraday/daily etc) cutoff : DateTime (optional) end date to read up to dateparse : str (optional) date parser to use postfix : str (optional) postfix to add to each columns intraday_tz : str timezone of file if uses intraday data Returns ------- DataFrame """ return self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse, postfix=postfix, intraday_tz=intraday_tz, excel_sheet=excel_sheet) def remove_time_series_cache_on_disk(self, fname, engine='hdf5_fixed', db_server='127.0.0.1', db_port='6379', timeout=10, username=None, password=None): if 'hdf5' in engine: engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) pass elif (engine == 'redis'): import redis fname = os.path.basename(fname).replace('.', '_') try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) if (fname == 'flush_all_keys'): r.flushall() else: # allow deletion of keys by pattern matching if "*" in fname: x = r.keys(fname) if len(x) > 0: r.delete(x) r.delete(fname) except Exception as e: self.logger.warning("Cannot delete non-existent key " + fname + " in Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') self.logger.info('Load MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) store.delete_library(fname) c.close() self.logger.info("Deleted MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # delete the old copy try: os.remove(h5_filename) except: pass ### functions to handle HDF5 on disk def write_time_series_cache_to_disk(self, fname, data_frame, engine='hdf5_fixed', append_data=False, db_server=DataConstants().db_server, db_port=DataConstants().db_port, username=None, password=None, filter_out_matching=None, timeout=10): """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic Parmeters --------- fname : str path of file data_frame : DataFrame data frame to be written to disk engine : str 'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this 'hdf5_table' - use HDF5 table format, slower but can append to 'parquet' - use Parquet 'arctic' - use Arctic/MongoDB database 'redis' - use Redis append_data : bool False - write a fresh copy of data on disk each time True - append data to disk db_server : str Database server for arctic (default: '127.0.0.1') timeout : int Number of seconds to do timeout """ # default HDF5 format hdf5_format = 'fixed' if 'hdf5' in engine: hdf5_format = engine.split('_')[1] engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) data_frame.columns = self.find_replace_chars(data_frame.columns, _invalid_chars, _replace_chars) data_frame.columns = ['A_' + x for x in data_frame.columns] data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns') bcolzpath = self.get_bcolz_filename(fname) shutil.rmtree(bcolzpath, ignore_errors=True) zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath) elif (engine == 'redis'): import redis fname = os.path.basename(fname).replace('.', '_') try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) if isinstance(data_frame, pandas.DataFrame): r.set(fname, data_frame.to_msgpack(compress='blosc')) self.logger.info("Pushed " + fname + " to Redis") except Exception as e: self.logger.warning("Couldn't push " + fname + " to Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') self.logger.info('Load Arctic/MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) database = None try: database = store[fname] except: pass if database is None: store.initialize_library(fname, audit=False) self.logger.info("Created MongoDB library: " + fname) else: self.logger.info("Got MongoDB library: " + fname) # Access the library library = store[fname] if ('intraday' in fname): data_frame = data_frame.astype('float32') if filter_out_matching is not None: cols = data_frame.columns new_cols = [] for col in cols: if filter_out_matching not in col: new_cols.append(col) data_frame = data_frame[new_cols] # can duplicate values if we have existing dates if append_data: library.append(fname, data_frame) else: library.write(fname, data_frame) c.close() self.logger.info("Written MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # append data only works for HDF5 stored as tables (but this is much slower than fixed format) # removes duplicated entries at the end if append_data: store = pandas.HDFStore(h5_filename, format=hdf5_format, complib="blosc", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') # get last row which matches and remove everything after that (because append # function doesn't check for duplicated rows nrows = len(store['data'].index) last_point = data_frame.index[-1] i = nrows - 1 while (i > 0): read_index = store.select('data', start=i, stop=nrows).index[0] if (read_index <= last_point): break i = i - 1 # remove rows at the end, which are duplicates of the incoming time series store.remove(key='data', start=i, stop=nrows) store.put(key='data', value=data_frame, format=hdf5_format, append=True) store.close() else: h5_filename_temp = self.get_h5_filename(fname + ".temp") # delete the old copy try: os.remove(h5_filename_temp) except: pass store = pandas.HDFStore(h5_filename_temp, format=hdf5_format, complib="blosc", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') store.put(key='data', value=data_frame, format=hdf5_format) store.close() # delete the old copy try: os.remove(h5_filename) except: pass # once written to disk rename os.rename(h5_filename_temp, h5_filename) self.logger.info("Written HDF5: " + fname) elif (engine == 'parquet'): if fname[-5:] != '.gzip': fname = fname + '.gzip' data_frame.to_parquet(fname, compression='gzip') self.logger.info("Written Parquet: " + fname) def get_h5_filename(self, fname): """Strips h5 off filename returning first portion of filename Parameters ---------- fname : str h5 filename to strip Returns ------- str """ if fname[-3:] == '.h5': return fname return fname + ".h5" def get_bcolz_filename(self, fname): """Strips bcolz off filename returning first portion of filename Parameters ---------- fname : str bcolz filename to strip Returns ------- str """ if fname[-6:] == '.bcolz': return fname return fname + ".bcolz" def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields=None): """Write a DataFrame to disk in as an R compatible HDF5 file. Parameters ---------- data_frame : DataFrame data frame to be written fname : str file path to be written fields : list(str) columns to be written """ fname_r = self.get_h5_filename(fname) self.logger.info("About to dump R binary HDF5 - " + fname_r) data_frame32 = data_frame.astype('float32') if fields is None: fields = data_frame32.columns.values # decompose date/time into individual fields (easier to pick up in R) data_frame32['Year'] = data_frame.index.year data_frame32['Month'] = data_frame.index.month data_frame32['Day'] = data_frame.index.day data_frame32['Hour'] = data_frame.index.hour data_frame32['Minute'] = data_frame.index.minute data_frame32['Second'] = data_frame.index.second data_frame32['Millisecond'] = data_frame.index.microsecond / 1000 data_frame32 = data_frame32[ ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'] + fields] cols = data_frame32.columns store_export = pandas.HDFStore(fname_r) store_export.put('df_for_r', data_frame32, data_columns=cols) store_export.close() def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None, finish_date=None, db_server=DataConstants().db_server, db_port=DataConstants().db_port, username=None, password=None): """Reads time series cache from disk in either HDF5 or bcolz Parameters ---------- fname : str (or list) file to be read from engine : str (optional) 'hd5' - reads HDF5 files (default) 'arctic' - reads from Arctic/MongoDB database 'bcolz' = reads from bcolz file (not fully implemented) start_date : str/datetime (optional) Start date finish_date : str/datetime (optional) Finish data db_server : str IP address of MongdDB (default '127.0.0.1') Returns ------- DataFrame """ logger = LoggerManager.getLogger(__name__) data_frame_list = [] if not(isinstance(fname, list)): if '*' in fname: fname = glob.glob(fname) else: fname = [fname] for fname_single in fname: logger.debug("Reading " + fname_single + "..") if (engine == 'bcolz'): try: name = self.get_bcolz_filename(fname_single) zlens = bcolz.open(rootdir=name) data_frame = zlens.todataframe() data_frame.index = pandas.DatetimeIndex(data_frame['DTS_']) data_frame.index.name = 'Date' del data_frame['DTS_'] # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas data_frame.columns = self.find_replace_chars(data_frame.columns, _replace_chars, _invalid_chars) data_frame.columns = [x[2:] for x in data_frame.columns] except: data_frame = None elif (engine == 'redis'): import redis fname_single = os.path.basename(fname_single).replace('.', '_') msg = None try: r = redis.StrictRedis(host=db_server, port=db_port, db=0) msg = r.get(fname_single) except: self.logger.info("Cache not existent for " + fname_single + " in Redis") if msg is None: data_frame = None else: self.logger.info('Load Redis cache: ' + fname_single) data_frame = pandas.read_msgpack(msg) elif (engine == 'arctic'): socketTimeoutMS = 2 * 1000 import pymongo from arctic import Arctic fname_single = os.path.basename(fname_single).replace('.', '_') self.logger.info('Load Arctic/MongoDB library: ' + fname_single) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS) # Access the library try: library = store[fname_single] if start_date is None and finish_date is None: item = library.read(fname_single) else: from arctic.date import DateRange item = library.read(fname_single, date_range=DateRange(start_date, finish_date)) c.close() self.logger.info('Read ' + fname_single) data_frame = item.data except Exception as e: self.logger.warning('Library does not exist: ' + fname_single + ' & message is ' + str(e)) data_frame = None elif os.path.isfile(self.get_h5_filename(fname_single)): store = pandas.HDFStore(self.get_h5_filename(fname_single)) data_frame = store.select("data") if ('intraday' in fname_single): data_frame = data_frame.astype('float32') store.close() elif os.path.isfile(fname_single): data_frame = pandas.read_parquet(fname_single) data_frame_list.append(data_frame) if len(data_frame_list) == 1: return data_frame_list[0] return data_frame_list ### functions for CSV reading and writing def write_time_series_to_csv(self, csv_path, data_frame): data_frame.to_csv(csv_path) def read_csv_data_frame(self, f_name, freq, cutoff=None, dateparse=None, postfix='.close', intraday_tz='UTC', excel_sheet=None): """Reads CSV/Excel from disk into DataFrame Parameters ---------- f_name : str CSV/Excel file path to read freq : str Frequency of data to read (intraday/daily etc) cutoff : DateTime (optional) end date to read up to dateparse : str (optional) date parser to use postfix : str (optional) postfix to add to each columns intraday_tz : str (optional) timezone of file if uses intraday data excel_sheet : str (optional) Excel sheet to be read Returns ------- DataFrame """ if (freq == 'intraday'): if dateparse is None: dateparse = lambda x: datetime.datetime(*map(int, [x[6:10], x[3:5], x[0:2], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'dukascopy': dateparse = lambda x: datetime.datetime(*map(int, [x[0:4], x[5:7], x[8:10], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'c': # use C library for parsing dates, several hundred times quicker # requires compilation of library to install import ciso8601 dateparse = lambda x: ciso8601.parse_datetime(x) if excel_sheet is None: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=True, date_parser=dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA']) data_frame = data_frame.astype('float32') data_frame.index.names = ['Date'] old_cols = data_frame.columns new_cols = [] # add '.close' to each column name for col in old_cols: new_cols.append(col + postfix) data_frame.columns = new_cols else: # daily data if 'events' in f_name: data_frame = pandas.read_csv(f_name) # very slow conversion data_frame = data_frame.convert_objects(convert_dates='coerce') else: if excel_sheet is None: try: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["DATE"], date_parser=dateparse) except: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["Date"], date_parser=dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA']) # convert Date to Python datetime # datetime data_frame['Date1'] = data_frame.index # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S') # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]), # int(x[12:13]), int(x[15:16]), int(x[18:19]))) # data_frame.index = data_frame['Date1'] # data_frame.drop('Date1') # slower method: data_frame.index = pandas.to_datetime(data_frame.index) if (freq == 'intraday'): # assume time series are already in UTC and assign this (can specify other time zones) data_frame = data_frame.tz_localize(intraday_tz) # end cutoff date if cutoff is not None: if (isinstance(cutoff, str)): cutoff = parse(cutoff) data_frame = data_frame.loc[data_frame.index < cutoff] return data_frame def find_replace_chars(self, array, to_find, replace_with): for i in range(0, len(to_find)): array = [x.replace(to_find[i], replace_with[i]) for x in array] return array def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None): """Converts CSV file to HDF5 file Parameters ---------- f_name : str File name to be read category : str data category of file (used in HDF5 filename) freq : str intraday/daily frequency (used in HDF5 filename) cutoff : DateTime (optional) filter dates up to here dateparse : str date parser to use """ self.logger.info("About to read... " + f_name) data_frame = self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse) category_f_name = self.create_cache_file_name(category) self.write_time_series_cache_to_disk(category_f_name, data_frame) def clean_csv_file(self, f_name): """Cleans up CSV file (removing empty characters) before writing back to disk Parameters ---------- f_name : str CSV file to be cleaned """ with codecs.open(f_name, 'rb', 'utf-8') as myfile: data = myfile.read() # clean file first if dirty if data.count('\x00'): self.logger.info('Cleaning CSV...') with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of: of.write(data.replace('\x00', '')) shutil.move(f_name + '.tmp', f_name) def create_cache_file_name(self, filename): return DataConstants().folder_time_series_data + "/" + filename # TODO refactor IOEngine so that each database is implemented in a subclass of DBEngine def get_engine(self, engine='hdf5_fixed'): pass
class MarketDataRequest(object): """Provides parameters for requesting market data. Includes parameters to define the ticker we'd like to fetch, the start and finish dates for our request, as well as the various fields we would like and also the frequency of the data. """ # properties # # data_source eg. bbg, yahoo, quandl # start_date # finish_date # tickers (can be list) eg. EURUSD # category (eg. fx, equities, fixed_income, cal_event, fundamental) # freq_mult (eg. 1) # freq (tick, intraday or daily) # gran_freq (minute, daily, hourly, daily, weekly, monthly, yearly) # fields (can be list) # vendor_tickers (optional) # vendor_fields (optional) # cache_algo (eg. internet, disk, memory) - internet will forcibly download from the internet # abstract_curve (optional) # environment (eg. prod, backtest) - old data is saved with prod, backtest will overwrite the last data point # overrides (optional) - if you need to specify any data overrides (eg. for BBG) def generate_key(self): """Generate a key to describe this MarketDataRequest object, which can be used in a cache, as a hash-style key Returns ------- str Key to describe this MarketDataRequest """ from findatapy.market.ioengine import SpeedCache if self.freq == 'daily': ticker = None else: ticker = self.tickers[0] self.__category_key = self.create_category_key(self, ticker=ticker) return SpeedCache().generate_key(self, ['logger', '_MarketDataRequest__abstract_curve', '_MarketDataRequest__cache_algo', '_MarketDataRequest__overrides']) def __init__(self, data_source = None, start_date ='year', finish_date = datetime.datetime.utcnow(), tickers = None, category = None, freq_mult = 1, freq = "daily", gran_freq = None, cut = "NYC", fields = ['close'], cache_algo = "internet_load_return", vendor_tickers = None, vendor_fields = None, environment = "backtest", trade_side = 'trade', expiry_date = None, md_request = None, abstract_curve = None, overrides = {} ): self.logger = LoggerManager().getLogger(__name__) # can deep copy MarketDataRequest (use a lock, so can be used with threading when downloading time series) if md_request is not None: import threading lock = threading.Lock() with lock: import copy self.freq_mult = copy.deepcopy(md_request.freq_mult) # define frequency of data self.gran_freq = copy.deepcopy(md_request.gran_freq) self.freq_mult = copy.deepcopy(md_request.freq_mult) self.freq = copy.deepcopy(md_request.freq) # data source, start and fin self.data_source = copy.deepcopy(md_request.data_source) self.start_date = copy.deepcopy(md_request.start_date) self.finish_date = copy.deepcopy(md_request.finish_date) self.category = copy.deepcopy(md_request.category) # special predefined categories self.cut = copy.deepcopy(md_request.cut) # closing time of the data (eg. NYC, LDN, TOK etc) self.fields = copy.deepcopy(md_request.fields) # fields, eg. close, high, low, open self.cache_algo = copy.deepcopy(md_request.cache_algo) # internet_load_return (cache_algo_return is for future use) self.vendor_tickers = copy.deepcopy(md_request.vendor_tickers) # define vendor tickers self.vendor_fields = copy.deepcopy(md_request.vendor_fields) # define vendor fields self.environment = copy.deepcopy(md_request.environment) # backtest environment only supported at present self.trade_side = copy.deepcopy(md_request.trade_side) self.expiry_date = copy.deepcopy(md_request.expiry_date) # self.abstract_curve = copy.deepcopy(md_request.abstract_curve) self.overrides = copy.deepcopy(md_request.overrides) self.tickers = copy.deepcopy(md_request.tickers) # need this after category in case have wildcard else: self.freq_mult = freq_mult # define frequency of data self.gran_freq = gran_freq self.freq_mult = freq_mult self.freq = freq # data source, start and fin self.data_source = data_source self.start_date = start_date self.finish_date = finish_date self.category = category # special predefined categories self.cut = cut # closing time of the data (eg. NYC, LDN, TOK etc) self.fields = fields # fields, eg. close, high, low, open self.cache_algo = cache_algo # internet_load_return (cache_algo_return is for future use) self.vendor_tickers = vendor_tickers # define vendor tickers self.vendor_fields = vendor_fields # define vendor fields self.environment = environment # backtest environment only supported at present self.trade_side = trade_side self.expiry_date = expiry_date self.abstract_curve = abstract_curve self.overrides = overrides self.tickers = tickers def create_category_key(self, market_data_request, ticker=None): """Returns a category key for the associated MarketDataRequest, which can be used to create filenames (or as part of a storage key in a cache) Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ category = 'default-cat' cut = 'default-cut' if market_data_request.category is not None: category = market_data_request.category environment = market_data_request.environment source = market_data_request.data_source freq = market_data_request.freq if market_data_request.cut is not None: cut = market_data_request.cut if (ticker is not None): key = str(environment) + "." + str(category) + '.' + str(source) + '.' + str(freq) + '.' + str(cut) \ + '.' + str(ticker) else: key = str(environment) + "." + str(category) + '.' + str(source) + '.' + str(freq) + '.' + str(cut) return key @property def data_source(self): return self.__data_source @data_source.setter def data_source(self, data_source): try: valid_data_source = ['ats', 'bloomberg', 'dukascopy', 'fred', 'gain', 'google', 'quandl', 'yahoo'] if not data_source in valid_data_source: self.logger.warning(data_source & " is not a defined data source.") except: pass self.__data_source = data_source @property def category(self): return self.__category @category.setter def category(self, category): self.__category = category @property def tickers(self): return self.__tickers @tickers.setter def tickers(self, tickers): if tickers is not None: if not isinstance(tickers, list): tickers = [tickers] config = None new_tickers = [] if tickers is not None: for tick in tickers: if '*' in tick: start = '' if tick[-1] == "*" and tick[0] != "*": start = "^" tick = start + "(" + tick.replace('*','') + ")" if config is None: from findatapy.util import ConfigManager config = ConfigManager().get_instance() new_tickers.append(config.get_filtered_tickers_list_for_category( self.__category, self.__data_source, self.__freq, self.__cut, tick)) else: new_tickers.append(tick) new_tickers = self._flatten_list(new_tickers) self.__tickers = new_tickers else: self.__tickers = tickers @property def fields(self): return self.__fields @fields.setter def fields(self, fields): valid_fields = ['open', 'high', 'low', 'close', 'volume', 'numEvents'] if not isinstance(fields, list): fields = [fields] for field_entry in fields: if not field_entry in valid_fields: i = 0 # self.logger.warning(field_entry + " is not a valid field.") # add error checking self.__fields = fields @property def vendor_tickers(self): return self.__vendor_tickers @vendor_tickers.setter def vendor_tickers(self, vendor_tickers): if vendor_tickers is not None: if not isinstance(vendor_tickers, list): vendor_tickers = [vendor_tickers] self.__vendor_tickers = vendor_tickers @property def vendor_fields(self): return self.__vendor_fields @vendor_fields.setter def vendor_fields(self, vendor_fields): if vendor_fields is not None: if not isinstance(vendor_fields, list): vendor_fields = [vendor_fields] self.__vendor_fields = vendor_fields @property def freq(self): return self.__freq @freq.setter def freq(self, freq): freq = freq.lower() valid_freq = ['tick', 'second', 'minute', 'intraday', 'hourly', 'daily', 'weekly', 'monthly', 'quarterly', 'annually'] if not freq in valid_freq: self.logger.warning(freq + " is not a defined frequency") self.__freq = freq @property def gran_freq(self): return self.__gran_freq @gran_freq.setter def gran_freq(self, gran_freq): try: gran_freq = gran_freq.lower() valid_gran_freq = ['tick', 'second', 'minute', 'hourly', 'pseudodaily', 'daily', 'weekly', 'monthly', 'quarterly', 'annually'] if not gran_freq in valid_gran_freq: self.logger.warning(gran_freq & " is not a defined frequency") if gran_freq in ['minute', 'hourly']: self.__freq = 'intraday' elif gran_freq in ['tick', 'second']: self.__freq = 'tick' else: self.__freq = 'daily' except: pass self.__gran_freq = gran_freq @property def freq_mult(self): return self.__freq_mult @freq_mult.setter def freq_mult(self, freq_mult): self.__freq_mult = freq_mult @property def start_date(self): return self.__start_date @start_date.setter def start_date(self, start_date): self.__start_date = self.date_parser(start_date) @property def finish_date(self): return self.__finish_date @finish_date.setter def finish_date(self, finish_date): self.__finish_date = self.date_parser(finish_date) @property def cut(self): return self.__cut @cut.setter def cut(self, cut): self.__cut = cut def date_parser(self, date): if isinstance(date, str): date1 = datetime.datetime.utcnow() if date is 'midnight': date1 = datetime.datetime(date1.year, date1.month, date1.day, 0, 0, 0) elif date is 'decade': date1 = date1 - timedelta(days=360 * 10) elif date is 'year': date1 = date1 - timedelta(days=360) elif date is 'month': date1 = date1 - timedelta(days=30) elif date is 'week': date1 = date1 - timedelta(days=7) elif date is 'day': date1 = date1 - timedelta(days=1) elif date is 'hour': date1 = date1 - timedelta(hours=1) else: # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M' try: date1 = datetime.datetime.strptime(date, '%b %d %Y %H:%M') except: # self.logger.warning("Attempted to parse date") i = 0 # format expected '1 Jun 2005 01:33', '%d %b %Y %H:%M' try: date1 = datetime.datetime.strptime(date, '%d %b %Y %H:%M') except: # self.logger.warning("Attempted to parse date") i = 0 try: date1 = datetime.datetime.strptime(date, '%b %d %Y') except: # self.logger.warning("Attempted to parse date") i = 0 try: date1 = datetime.datetime.strptime(date, '%d %b %Y') except: # self.logger.warning("Attempted to parse date") i = 0 else: import pandas date1 = pandas.Timestamp(date) return date1 @property def cache_algo(self): return self.__cache_algo @cache_algo.setter def cache_algo(self, cache_algo): cache_algo = cache_algo.lower() valid_cache_algo = ['internet_load', 'internet_load_return', 'cache_algo', 'cache_algo_return'] if not cache_algo in valid_cache_algo: self.logger.warning(cache_algo + " is not a defined caching scheme") self.__cache_algo = cache_algo @property def environment(self): return self.__environment @environment.setter def environment(self, environment): environment = environment.lower() valid_environment= ['prod', 'backtest'] if not environment in valid_environment: self.logger.warning(environment + " is not a defined environment.") self.__environment = environment @property def trade_side(self): return self.__trade_side @trade_side.setter def trade_side(self, trade_side): trade_side = trade_side.lower() valid_trade_side = ['trade', 'bid', 'ask'] if not trade_side in valid_trade_side: self.logger.warning(trade_side + " is not a defined trade side.") self.__trade_side = trade_side @property def expiry_date(self): return self.__expiry_date @expiry_date.setter def expiry_date(self, expiry_date): self.__expiry_date = self.date_parser(expiry_date) @property def abstract_curve(self): return self.__abstract_curve @abstract_curve.setter def abstract_curve(self, abstract_curve): if abstract_curve is not None: self.__abstract_curve_key = abstract_curve.generate_key() else: self.__abstract_curve_key = None self.__abstract_curve = abstract_curve @property def overrides(self): return self.__overrides @overrides.setter def overrides(self, overrides): self.__overrides = overrides def _flatten_list(self, list_of_lists): """Flattens list, particularly useful for combining baskets Parameters ---------- list_of_lists : str (list) List to be flattened Returns ------- """ result = [] for i in list_of_lists: # Only append if i is a basestring (superclass of string) if isinstance(i, str): result.append(i) # Otherwise call this function recursively else: result.extend(self._flatten_list(i)) return result
class BacktestRequest(MarketDataRequest): """Contains parameters necessary to define a backtest, including start date, finish date, transaction cost, etc Used by TradingModel and Backtest to construct backtested returns for trading strategies """ def __init__(self): super(MarketDataRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None # output parameters for backtest (should we add returns statistics on legends, write CSVs with returns etc.) self.__plot_start = None self.__calc_stats = True self.__write_csv = False self.__plot_interim = False self.__include_benchmark = False self.__tech_params = TechParams() # default parameters for portfolio level vol adjustment self.__portfolio_vol_adjust = False self.__portfolio_vol_period_shift = 0 self.__portfolio_vol_rebalance_freq = None self.__portfolio_vol_resample_freq = None self.__portfolio_vol_resample_type = 'mean' self.__portfolio_vol_target = 0.1 # 10% vol target self.__portfolio_vol_max_leverage = None self.__portfolio_vol_periods = 20 self.__portfolio_vol_obs_in_year = 252 # default parameters for signal level vol adjustment self.__signal_vol_adjust = False self.__signal_vol_period_shift = 0 self.__signal_vol_rebalance_freq = None self.__signal_vol_resample_freq = None self.__signal_vol_resample_type = 'mean' self.__signal_vol_target = 0.1 # 10% vol target self.__signal_vol_max_leverage = None self.__signal_vol_periods = 20 self.__signal_vol_obs_in_year = 252 # portfolio notional size self.__portfolio_notional_size = None self.__portfolio_combination = None # parameters for maximum position limits (expressed as whole portfolio) self.__max_net_exposure = None self.__max_abs_exposure = None self.__position_clip_rebalance_freq = None self.__position_clip_resample_freq = None # by default apply max position criterion on last business day of month self.__position_clip_resample_type = 'mean' self.__position_clip_period_shift = 0 # take profit and stop loss parameters self.__take_profit = None self.__stop_loss = None # should we delay the signal? self.__signal_delay = 0 ##### properties for output of the backtest @property def plot_start(self): return self.__plot_start @plot_start.setter def plot_start(self, plot_start): self.__plot_start = plot_start @property def calc_stats(self): return self.__calc_stats @calc_stats.setter def calc_stats(self, calc_stats): self.__calc_stats = calc_stats @property def write_csv(self): return self.__write_csv @write_csv.setter def write_csv(self, write_csv): self.__write_csv = write_csv @property def plot_interim(self): return self.__plot_interim @plot_interim.setter def plot_interim(self, plot_interim): self.__plot_interim = plot_interim @property def include_benchmark(self): return self.__include_benchmark @include_benchmark.setter def include_benchmark(self, include_benchmark): self.__include_benchmark = include_benchmark ##### properties for portfolio level volatility adjustment @property def portfolio_vol_adjust(self): return self.__portfolio_vol_adjust @portfolio_vol_adjust.setter def portfolio_vol_adjust(self, portfolio_vol_adjust): self.__portfolio_vol_adjust = portfolio_vol_adjust @property def portfolio_vol_rebalance_freq(self): return self.__portfolio_vol_rebalance_freq @portfolio_vol_rebalance_freq.setter def portfolio_vol_rebalance_freq(self, portfolio_vol_rebalance_freq): self.__portfolio_vol_rebalance_freq = portfolio_vol_rebalance_freq @property def portfolio_vol_resample_type(self): return self.__portfolio_vol_resample_type @portfolio_vol_resample_type.setter def portfolio_vol_resample_type(self, portfolio_vol_resample_type): self.__portfolio_vol_resample_type = portfolio_vol_resample_type @property def portfolio_vol_resample_freq(self): return self.__portfolio_vol_resample_freq @portfolio_vol_resample_freq.setter def portfolio_vol_resample_freq(self, portfolio_vol_resample_freq): self.__portfolio_vol_resample_freq = portfolio_vol_resample_freq @property def portfolio_vol_period_shift(self): return self.__portfolio_vol_period_shift @portfolio_vol_period_shift.setter def portfolio_vol_period_shift(self, portfolio_vol_period_shift): self.__portfolio_vol_period_shift = portfolio_vol_period_shift @property def portfolio_vol_target(self): return self.__portfolio_vol_target @portfolio_vol_target.setter def portfolio_vol_target(self, portfolio_vol_target): self.__portfolio_vol_target = portfolio_vol_target @property def portfolio_vol_max_leverage(self): return self.__portfolio_vol_max_leverage @portfolio_vol_max_leverage.setter def portfolio_vol_max_leverage(self, portfolio_vol_max_leverage): self.__portfolio_vol_max_leverage = portfolio_vol_max_leverage @property def portfolio_vol_periods(self): return self.__portfolio_vol_periods @portfolio_vol_periods.setter def portfolio_vol_periods(self, portfolio_vol_periods): self.__portfolio_vol_periods = portfolio_vol_periods @property def portfolio_vol_obs_in_year(self): return self.__portfolio_vol_obs_in_year @portfolio_vol_obs_in_year.setter def portfolio_vol_obs_in_year(self, portfolio_vol_obs_in_year): self.__portfolio_vol_obs_in_year = portfolio_vol_obs_in_year ##### properties for signal level vol adjustment @property def signal_vol_adjust(self): return self.__signal_vol_adjust @signal_vol_adjust.setter def signal_vol_adjust(self, signal_vol_adjust): self.__signal_vol_adjust = signal_vol_adjust @property def signal_vol_rebalance_freq(self): return self.__signal_vol_rebalance_freq @signal_vol_rebalance_freq.setter def signal_vol_rebalance_freq(self, signal_vol_rebalance_freq): self.__signal_vol_rebalance_freq = signal_vol_rebalance_freq @property def signal_vol_resample_type(self): return self.__signal_vol_resample_type @signal_vol_resample_type.setter def signal_vol_resample_type(self, signal_vol_resample_type): self.__signal_vol_resample_type = signal_vol_resample_type @property def signal_vol_resample_freq(self): return self.__signal_vol_resample_freq @signal_vol_resample_freq.setter def signal_vol_resample_freq(self, signal_vol_resample_freq): self.__signal_vol_resample_freq = signal_vol_resample_freq @property def signal_vol_period_shift(self): return self.__signal_vol_period_shift @signal_vol_period_shift.setter def signal_vol_period_shift(self, signal_vol_period_shift): self.__signal_vol_period_shift = signal_vol_period_shift @property def signal_vol_target(self): return self.__signal_vol_target @signal_vol_target.setter def signal_vol_target(self, signal_vol_target): self.__signal_vol_target = signal_vol_target @property def signal_vol_max_leverage(self): return self.__signal_vol_max_leverage @signal_vol_max_leverage.setter def signal_vol_max_leverage(self, signal_vol_max_leverage): self.__signal_vol_max_leverage = signal_vol_max_leverage @property def signal_vol_periods(self): return self.__signal_vol_periods @signal_vol_periods.setter def signal_vol_periods(self, signal_vol_periods): self.__signal_vol_periods = signal_vol_periods @property def signal_vol_obs_in_year(self): return self.__signal_vol_obs_in_year @signal_vol_obs_in_year.setter def signal_vol_obs_in_year(self, signal_vol_obs_in_year): self.__signal_vol_obs_in_year = signal_vol_obs_in_year ##### portfolio notional size @property def portfolio_notional_size(self): return self.__portfolio_notional_size @portfolio_notional_size.setter def portfolio_notional_size(self, portfolio_notional_size): self.__portfolio_notional_size = float(portfolio_notional_size) ##### portfolio weights (sum, mean or dictionary of weights) @property def portfolio_combination(self): return self.__portfolio_combination @portfolio_combination.setter def portfolio_combination(self, portfolio_combination): self.__portfolio_combination = portfolio_combination ##### properties for maximum position constraints @property def max_net_exposure(self): return self.__max_net_exposure @max_net_exposure.setter def max_net_exposure(self, max_net_exposure): self.__max_net_exposure = max_net_exposure @property def max_abs_exposure(self): return self.__max_abs_exposure @max_abs_exposure.setter def max_abs_exposure(self, max_abs_exposure): self.__max_abs_exposure = max_abs_exposure @property def position_clip_rebalance_freq(self): return self.__position_clip_rebalance_freq @position_clip_rebalance_freq.setter def position_clip_rebalance_freq(self, position_clip_rebalance_freq): self.__position_clip_rebalance_freq = position_clip_rebalance_freq @property def position_clip_resample_type(self): return self.__position_clip_resample_type @position_clip_resample_type.setter def position_clip_resample_type(self, position_clip_resample_type): self.__position_clip_resample_type = position_clip_resample_type @property def position_clip_resample_freq(self): return self.__position_clip_resample_freq @position_clip_resample_freq.setter def position_clip_resample_freq(self, position_clip_resample_freq): self.__position_clip_resample_freq = position_clip_resample_freq @property def position_clip_period_shift(self): return self.__position_clip_period_shift @position_clip_period_shift.setter def position_clip_period_shift(self, position_clip_period_shift): self.__position_clip_period_shift = position_clip_period_shift ##### stop loss and take profit @property def stop_loss(self): return self.__stop_loss @stop_loss.setter def stop_loss(self, stop_loss): self.__stop_loss = stop_loss @property def take_profit(self): return self.__take_profit @take_profit.setter def take_profit(self, take_profit): self.__take_profit = take_profit ##### tech indicators and spot bp tc @property def tech_params(self): return self.__tech_params @tech_params.setter def tech_params(self, tech_params): self.__tech_params = tech_params @property def spot_tc_bp(self): return self.__spot_tc_bp @spot_tc_bp.setter def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0) #### FOR FUTURE USE ### @property def signal_name(self): return self.__signal_name @signal_name.setter def signal_name(self, signal_name): self.__signal_name = signal_name @property def asset(self): return self.__asset @asset.setter def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.") self.__asset = asset @property def instrument(self): return self.__instrument @instrument.setter def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument @property def signal_delay(self): return self.__signal_delay @signal_delay.setter def signal_delay(self, signal_delay): self.__signal_delay = signal_delay
def write_time_series_cache_to_disk( self, fname, data_frame, engine='hdf5_fixed', append_data=False, db_server=constants.db_server, db_port=constants.db_port, username=constants.db_username, password=constants.db_password, filter_out_matching=None, timeout=10, use_cache_compression=constants.use_cache_compression, parquet_compression=constants.parquet_compression, md_request=None, ticker=None): """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic Parmeters --------- fname : str path of file data_frame : DataFrame data frame to be written to disk engine : str 'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this 'hdf5_table' - use HDF5 table format, slower but can append to 'parquet' - use Parquet 'arctic' - use Arctic/MongoDB database 'redis' - use Redis append_data : bool False - write a fresh copy of data on disk each time True - append data to disk db_server : str Database server for arctic (default: '127.0.0.1') timeout : int Number of seconds to do timeout """ logger = LoggerManager().getLogger(__name__) if md_request is not None: fname = self.path_join( fname, md_request.create_category_key(ticker=ticker)) # default HDF5 format hdf5_format = 'fixed' if 'hdf5' in engine: hdf5_format = engine.split('_')[1] engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) data_frame.columns = self.find_replace_chars( data_frame.columns, _invalid_chars, _replace_chars) data_frame.columns = ['A_' + x for x in data_frame.columns] data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns') bcolzpath = self.get_bcolz_filename(fname) shutil.rmtree(bcolzpath, ignore_errors=True) zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath) elif (engine == 'redis'): fname = os.path.basename(fname).replace('.', '_') # Will fail if Redis is not installed try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) ping = r.ping() # If Redis is alive, try pushing to it if ping: if data_frame is not None: if isinstance(data_frame, pandas.DataFrame): mem = data_frame.memory_usage(deep='deep').sum() mem_float = round( float(mem) / (1024.0 * 1024.0), 3) if mem_float < 500: # msgpack/blosc is deprecated # r.set(fname, data_frame.to_msgpack(compress='blosc')) # now uses pyarrow context = pa.default_serialization_context() ser = context.serialize(data_frame).to_buffer() if use_cache_compression: comp = pa.compress(ser, codec='lz4', asbytes=True) siz = len(ser) # siz = 3912 r.set('comp_' + str(siz) + '_' + fname, comp) else: r.set(fname, ser.to_pybytes()) logger.info("Pushed " + fname + " to Redis") else: logger.warn("Did not push " + fname + " to Redis, given size") else: logger.info("Object " + fname + " is empty, not pushed to Redis.") else: logger.warning("Didn't push " + fname + " to Redis given not running") except Exception as e: logger.warning("Couldn't push " + fname + " to Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') logger.info('Load Arctic/MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) database = None try: database = store[fname] except: pass if database is None: store.initialize_library(fname, audit=False) logger.info("Created MongoDB library: " + fname) else: logger.info("Got MongoDB library: " + fname) # Access the library library = store[fname] if ('intraday' in fname): data_frame = data_frame.astype('float32') if filter_out_matching is not None: cols = data_frame.columns new_cols = [] for col in cols: if filter_out_matching not in col: new_cols.append(col) data_frame = data_frame[new_cols] # Problems with Arctic when writing timezone to disk sometimes, so strip data_frame = data_frame.copy().tz_localize(None) try: # Can duplicate values if we have existing dates if append_data: library.append(fname, data_frame) else: library.write(fname, data_frame) c.close() logger.info("Written MongoDB library: " + fname) except Exception as e: logger.warning("Couldn't write MongoDB library: " + fname + " " + str(e)) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # append data only works for HDF5 stored as tables (but this is much slower than fixed format) # removes duplicated entries at the end if append_data: store = pandas.HDFStore(h5_filename, format=hdf5_format, complib="zlib", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') # get last row which matches and remove everything after that (because append # function doesn't check for duplicated rows nrows = len(store['data'].index) last_point = data_frame.index[-1] i = nrows - 1 while (i > 0): read_index = store.select('data', start=i, stop=nrows).index[0] if (read_index <= last_point): break i = i - 1 # remove rows at the end, which are duplicates of the incoming time series store.remove(key='data', start=i, stop=nrows) store.put(key='data', value=data_frame, format=hdf5_format, append=True) store.close() else: h5_filename_temp = self.get_h5_filename(fname + ".temp") # delete the old copy try: os.remove(h5_filename_temp) except: pass store = pandas.HDFStore(h5_filename_temp, complib="zlib", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') store.put(key='data', value=data_frame, format=hdf5_format) store.close() # delete the old copy try: os.remove(h5_filename) except: pass # once written to disk rename os.rename(h5_filename_temp, h5_filename) logger.info("Written HDF5: " + fname) elif (engine == 'parquet'): if '.parquet' not in fname: if fname[-5:] != '.gzip': fname = fname + '.parquet' self.to_parquet(data_frame, fname, aws_region=constants.aws_region, parquet_compression=parquet_compression) # data_frame.to_parquet(fname, compression=parquet_compression) logger.info("Written Parquet: " + fname) elif engine == 'csv': if '.csv' not in fname: fname = fname + '.csv' data_frame.to_csv(fname) logger.info("Written CSV: " + fname)
def remove_time_series_cache_on_disk(self, fname, engine='hdf5_fixed', db_server='127.0.0.1', db_port='6379', timeout=10, username=None, password=None): logger = LoggerManager().getLogger(__name__) if 'hdf5' in engine: engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) pass elif (engine == 'redis'): fname = os.path.basename(fname).replace('.', '_') try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) if (fname == 'flush_all_keys'): r.flushall() else: # allow deletion of keys by pattern matching x = r.keys('*' + fname) if len(x) > 0: r.delete(x) # r.delete(fname) except Exception as e: logger.warning("Cannot delete non-existent key " + fname + " in Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') logger.info('Load MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) store.delete_library(fname) c.close() logger.info("Deleted MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # delete the old copy try: os.remove(h5_filename) except: pass
class BacktestRequest(MarketDataRequest): """Contains parameters necessary to define a backtest, including start date, finish date, transaction cost, etc Used by TradingModel and Backtest to construct backtested returns for trading strategies """ def __init__(self): super(MarketDataRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None self.__tech_params = TechParams() @property def signal_name(self): return self.__signal_name @signal_name.setter def signal_name(self, signal_name): self.__signal_name = signal_name @property def tech_params(self): return self.__tech_params @tech_params.setter def tech_params(self, tech_params): self.__tech_params = tech_params @property def spot_tc_bp(self): return self.__spot_tc_bp @spot_tc_bp.setter def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0) @property def asset(self): return self.__asset @asset.setter def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.") self.__asset = asset @property def instrument(self): return self.__instrument @instrument.setter def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument
class MarketDataRequest(object): # properties # # data_source eg. bbg, yahoo, quandl # start_date # finish_date # tickers (can be list) eg. EURUSD # category (eg. fx, equities, fixed_income, cal_event, fundamental) # freq_mult (eg. 1) # freq # gran_freq (minute, daily, hourly, daily, weekly, monthly, yearly) # fields (can be list) # vendor_tickers (optional) # vendor_fields (optional) # cache_algo (eg. internet, disk, memory) - internet will forcibly download from the internet # environment (eg. prod, backtest) - old data is saved with prod, backtest will overwrite the last data point def __init__(self, data_source = None, start_date ='year', finish_date = datetime.datetime.utcnow(), tickers = None, category = None, freq_mult = 1, freq = "daily", gran_freq = None, cut = "NYC", fields = ['close'], cache_algo = "internet_load_return", vendor_tickers = None, vendor_fields = None, environment = "backtest", trade_side = 'trade' ): self.logger = LoggerManager().getLogger(__name__) self.freq_mult = 1 # define frequency of data self.gran_freq = gran_freq self.freq_mult = freq_mult self.freq = freq # data source, start and fin self.data_source = data_source self.start_date = start_date self.finish_date = finish_date self.tickers = tickers self.category = category # special predefined categories self.cut = cut # closing time of the data (eg. NYC, LDN, TOK etc) self.fields = fields # fields, eg. close, high, low, open self.cache_algo = cache_algo # internet_load_return (cache_algo_return is for future use) self.vendor_tickers = vendor_tickers # define vendor tickers self.vendor_fields = vendor_fields # define vendor fields self.environment = environment # backtest environment only supported at present self.trade_side = trade_side @property def data_source(self): return self.__data_source @data_source.setter def data_source(self, data_source): try: valid_data_source = ['ats', 'bloomberg', 'dukascopy', 'fred', 'gain', 'google', 'quandl', 'yahoo'] if not data_source in valid_data_source: self.logger.warning(data_source & " is not a defined data source.") except: pass self.__data_source = data_source @property def category(self): return self.__category @category.setter def category(self, category): self.__category = category @property def tickers(self): return self.__tickers @tickers.setter def tickers(self, tickers): if tickers is not None: if not isinstance(tickers, list): tickers = [tickers] self.__tickers = tickers @property def fields(self): return self.__fields @fields.setter def fields(self, fields): valid_fields = ['open', 'high', 'low', 'close', 'volume', 'numEvents'] if not isinstance(fields, list): fields = [fields] for field_entry in fields: if not field_entry in valid_fields: i = 0 # self.logger.warning(field_entry + " is not a valid field.") # add error checking self.__fields = fields @property def vendor_tickers(self): return self.__vendor_tickers @vendor_tickers.setter def vendor_tickers(self, vendor_tickers): if vendor_tickers is not None: if not isinstance(vendor_tickers, list): vendor_tickers = [vendor_tickers] self.__vendor_tickers = vendor_tickers @property def vendor_fields(self): return self.__vendor_fields @vendor_fields.setter def vendor_fields(self, vendor_fields): if vendor_fields is not None: if not isinstance(vendor_fields, list): vendor_fields = [vendor_fields] self.__vendor_fields = vendor_fields @property def freq(self): return self.__freq @freq.setter def freq(self, freq): freq = freq.lower() valid_freq = ['tick', 'second', 'minute', 'intraday', 'hourly', 'daily', 'weekly', 'monthly', 'quarterly', 'annually'] if not freq in valid_freq: self.logger.warning(freq + " is not a defined frequency") self.__freq = freq @property def gran_freq(self): return self.__gran_freq @gran_freq.setter def gran_freq(self, gran_freq): try: gran_freq = gran_freq.lower() valid_gran_freq = ['tick', 'second', 'minute', 'hourly', 'pseudodaily', 'daily', 'weekly', 'monthly', 'quarterly', 'annually'] if not gran_freq in valid_gran_freq: self.logger.warning(gran_freq & " is not a defined frequency") if gran_freq in ['minute', 'hourly']: self.__freq = 'intraday' elif gran_freq in ['tick', 'second']: self.__freq = 'tick' else: self.__freq = 'daily' except: pass self.__gran_freq = gran_freq @property def freq_mult(self): return self.__freq_mult @freq_mult.setter def freq_mult(self, freq_mult): self.__freq_mult = freq_mult @property def start_date(self): return self.__start_date @start_date.setter def start_date(self, start_date): self.__start_date = self.date_parser(start_date) @property def finish_date(self): return self.__finish_date @finish_date.setter def finish_date(self, finish_date): self.__finish_date = self.date_parser(finish_date) @property def cut(self): return self.__cut @cut.setter def cut(self, cut): self.__cut = cut def date_parser(self, date): if isinstance(date, str): date1 = datetime.datetime.utcnow() if date is 'midnight': date1 = datetime.datetime(date1.year, date1.month, date1.day, 0, 0, 0) elif date is 'decade': date1 = date1 - timedelta(days=360 * 10) elif date is 'year': date1 = date1 - timedelta(days=360) elif date is 'month': date1 = date1 - timedelta(days=30) elif date is 'week': date1 = date1 - timedelta(days=7) elif date is 'day': date1 = date1 - timedelta(days=1) elif date is 'hour': date1 = date1 - timedelta(hours=1) else: # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M' try: date1 = datetime.datetime.strptime(date, '%b %d %Y %H:%M') except: # self.logger.warning("Attempted to parse date") i = 0 # format expected '1 Jun 2005 01:33', '%d %b %Y %H:%M' try: date1 = datetime.datetime.strptime(date, '%d %b %Y %H:%M') except: # self.logger.warning("Attempted to parse date") i = 0 try: date1 = datetime.datetime.strptime(date, '%b %d %Y') except: # self.logger.warning("Attempted to parse date") i = 0 try: date1 = datetime.datetime.strptime(date, '%d %b %Y') except: # self.logger.warning("Attempted to parse date") i = 0 else: date1 = date return date1 @property def cache_algo(self): return self.__cache_algo @cache_algo.setter def cache_algo(self, cache_algo): cache_algo = cache_algo.lower() valid_cache_algo = ['internet_load', 'internet_load_return', 'cache_algo', 'cache_algo_return'] if not cache_algo in valid_cache_algo: self.logger.warning(cache_algo + " is not a defined caching scheme") self.__cache_algo = cache_algo @property def environment(self): return self.__environment @environment.setter def environment(self, environment): environment = environment.lower() valid_environment= ['prod', 'backtest'] if not environment in valid_environment: self.logger.warning(environment + " is not a defined environment.") self.__environment = environment @property def trade_side(self): return self.__trade_side @trade_side.setter def trade_side(self, trade_side): trade_side = trade_side.lower() valid_trade_side = ['trade', 'bid', 'ask'] if not trade_side in valid_trade_side: self.logger.warning(trade_side + " is not a defined trade side.") self.__trade_side = trade_side
class BacktestRequest(MarketDataRequest): """Contains parameters necessary to define a backtest, including start date, finish date, transaction cost, etc Used by TradingModel and Backtest to construct backtested returns for trading strategies """ def __init__(self): super(MarketDataRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None self.__tech_params = TechParams() @property def signal_name(self): return self.__signal_name @signal_name.setter def signal_name(self, signal_name): self.__signal_name = signal_name @property def tech_params(self): return self.__tech_params @tech_params.setter def tech_params(self, tech_params): self.__tech_params = tech_params @property def spot_tc_bp(self): return self.__spot_tc_bp @spot_tc_bp.setter def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0) @property def asset(self): return self.__asset @asset.setter def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.") self.__asset = asset @property def instrument(self): return self.__instrument @instrument.setter def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument
class BacktestRequest(MarketDataRequest): """Contains parameters necessary to define a backtest, including start date, finish date, transaction cost, etc Used by TradingModel and Backtest to construct backtested returns for trading strategies """ def __init__(self): super(MarketDataRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None # output parameters for backtest (should we add returns statistics on legends, write CSVs with returns etc.) self.__plot_start = None self.__calc_stats = True self.__write_csv = False self.__write_csv_pnl = False self.__plot_interim = False self.__include_benchmark = False self.__tech_params = TechParams() # default parameters for portfolio level vol adjustment self.__portfolio_vol_adjust = False self.__portfolio_vol_period_shift = 0 self.__portfolio_vol_rebalance_freq = None self.__portfolio_vol_resample_freq = None self.__portfolio_vol_resample_type = 'mean' self.__portfolio_vol_target = 0.1 # 10% vol target self.__portfolio_vol_max_leverage = None self.__portfolio_vol_periods = 20 self.__portfolio_vol_obs_in_year = 252 # default parameters for signal level vol adjustment self.__signal_vol_adjust = False self.__signal_vol_period_shift = 0 self.__signal_vol_rebalance_freq = None self.__signal_vol_resample_freq = None self.__signal_vol_resample_type = 'mean' self.__signal_vol_target = 0.1 # 10% vol target self.__signal_vol_max_leverage = None self.__signal_vol_periods = 20 self.__signal_vol_obs_in_year = 252 # portfolio notional size self.__portfolio_notional_size = None self.__portfolio_combination = None self.__portfolio_combination_weights = None # parameters for maximum position limits (expressed as whole portfolio) self.__max_net_exposure = None self.__max_abs_exposure = None self.__position_clip_rebalance_freq = None self.__position_clip_resample_freq = None # by default apply max position criterion on last business day of month self.__position_clip_resample_type = 'mean' self.__position_clip_period_shift = 0 # take profit and stop loss parameters self.__take_profit = None self.__stop_loss = None # should we delay the signal? self.__signal_delay = 0 ##### properties for output of the backtest @property def plot_start(self): return self.__plot_start @plot_start.setter def plot_start(self, plot_start): self.__plot_start = plot_start @property def calc_stats(self): return self.__calc_stats @calc_stats.setter def calc_stats(self, calc_stats): self.__calc_stats = calc_stats @property def write_csv(self): return self.__write_csv @write_csv.setter def write_csv(self, write_csv): self.__write_csv = write_csv @property def write_csv_pnl(self): return self.__write_csv_pnl @write_csv_pnl.setter def write_csv_pnl(self, write_csv_pnl): self.__write_csv_pnl = write_csv_pnl @property def plot_interim(self): return self.__plot_interim @plot_interim.setter def plot_interim(self, plot_interim): self.__plot_interim = plot_interim @property def include_benchmark(self): return self.__include_benchmark @include_benchmark.setter def include_benchmark(self, include_benchmark): self.__include_benchmark = include_benchmark ##### properties for portfolio level volatility adjustment @property def portfolio_vol_adjust(self): return self.__portfolio_vol_adjust @portfolio_vol_adjust.setter def portfolio_vol_adjust(self, portfolio_vol_adjust): self.__portfolio_vol_adjust = portfolio_vol_adjust @property def portfolio_vol_rebalance_freq(self): return self.__portfolio_vol_rebalance_freq @portfolio_vol_rebalance_freq.setter def portfolio_vol_rebalance_freq(self, portfolio_vol_rebalance_freq): self.__portfolio_vol_rebalance_freq = portfolio_vol_rebalance_freq @property def portfolio_vol_resample_type(self): return self.__portfolio_vol_resample_type @portfolio_vol_resample_type.setter def portfolio_vol_resample_type(self, portfolio_vol_resample_type): self.__portfolio_vol_resample_type = portfolio_vol_resample_type @property def portfolio_vol_resample_freq(self): return self.__portfolio_vol_resample_freq @portfolio_vol_resample_freq.setter def portfolio_vol_resample_freq(self, portfolio_vol_resample_freq): self.__portfolio_vol_resample_freq = portfolio_vol_resample_freq @property def portfolio_vol_period_shift(self): return self.__portfolio_vol_period_shift @portfolio_vol_period_shift.setter def portfolio_vol_period_shift(self, portfolio_vol_period_shift): self.__portfolio_vol_period_shift = portfolio_vol_period_shift @property def portfolio_vol_target(self): return self.__portfolio_vol_target @portfolio_vol_target.setter def portfolio_vol_target(self, portfolio_vol_target): self.__portfolio_vol_target = portfolio_vol_target @property def portfolio_vol_max_leverage(self): return self.__portfolio_vol_max_leverage @portfolio_vol_max_leverage.setter def portfolio_vol_max_leverage(self, portfolio_vol_max_leverage): self.__portfolio_vol_max_leverage = portfolio_vol_max_leverage @property def portfolio_vol_periods(self): return self.__portfolio_vol_periods @portfolio_vol_periods.setter def portfolio_vol_periods(self, portfolio_vol_periods): self.__portfolio_vol_periods = portfolio_vol_periods @property def portfolio_vol_obs_in_year(self): return self.__portfolio_vol_obs_in_year @portfolio_vol_obs_in_year.setter def portfolio_vol_obs_in_year(self, portfolio_vol_obs_in_year): self.__portfolio_vol_obs_in_year = portfolio_vol_obs_in_year ##### properties for signal level vol adjustment @property def signal_vol_adjust(self): return self.__signal_vol_adjust @signal_vol_adjust.setter def signal_vol_adjust(self, signal_vol_adjust): self.__signal_vol_adjust = signal_vol_adjust @property def signal_vol_rebalance_freq(self): return self.__signal_vol_rebalance_freq @signal_vol_rebalance_freq.setter def signal_vol_rebalance_freq(self, signal_vol_rebalance_freq): self.__signal_vol_rebalance_freq = signal_vol_rebalance_freq @property def signal_vol_resample_type(self): return self.__signal_vol_resample_type @signal_vol_resample_type.setter def signal_vol_resample_type(self, signal_vol_resample_type): self.__signal_vol_resample_type = signal_vol_resample_type @property def signal_vol_resample_freq(self): return self.__signal_vol_resample_freq @signal_vol_resample_freq.setter def signal_vol_resample_freq(self, signal_vol_resample_freq): self.__signal_vol_resample_freq = signal_vol_resample_freq @property def signal_vol_period_shift(self): return self.__signal_vol_period_shift @signal_vol_period_shift.setter def signal_vol_period_shift(self, signal_vol_period_shift): self.__signal_vol_period_shift = signal_vol_period_shift @property def signal_vol_target(self): return self.__signal_vol_target @signal_vol_target.setter def signal_vol_target(self, signal_vol_target): self.__signal_vol_target = signal_vol_target @property def signal_vol_max_leverage(self): return self.__signal_vol_max_leverage @signal_vol_max_leverage.setter def signal_vol_max_leverage(self, signal_vol_max_leverage): self.__signal_vol_max_leverage = signal_vol_max_leverage @property def signal_vol_periods(self): return self.__signal_vol_periods @signal_vol_periods.setter def signal_vol_periods(self, signal_vol_periods): self.__signal_vol_periods = signal_vol_periods @property def signal_vol_obs_in_year(self): return self.__signal_vol_obs_in_year @signal_vol_obs_in_year.setter def signal_vol_obs_in_year(self, signal_vol_obs_in_year): self.__signal_vol_obs_in_year = signal_vol_obs_in_year ##### portfolio notional size @property def portfolio_notional_size(self): return self.__portfolio_notional_size @portfolio_notional_size.setter def portfolio_notional_size(self, portfolio_notional_size): self.__portfolio_notional_size = float(portfolio_notional_size) ##### portfolio combination style (sum, mean, weighted, weighted-sum) @property def portfolio_combination(self): return self.__portfolio_combination @portfolio_combination.setter def portfolio_combination(self, portfolio_combination): self.__portfolio_combination = portfolio_combination ##### portfolio weights (sum, mean) @property def portfolio_combination_weights(self): return self.__portfolio_combination_weights @portfolio_combination_weights.setter def portfolio_combination_weights(self, portfolio_combination_weights): self.__portfolio_combination_weights = portfolio_combination_weights ##### properties for maximum position constraints @property def max_net_exposure(self): return self.__max_net_exposure @max_net_exposure.setter def max_net_exposure(self, max_net_exposure): self.__max_net_exposure = max_net_exposure @property def max_abs_exposure(self): return self.__max_abs_exposure @max_abs_exposure.setter def max_abs_exposure(self, max_abs_exposure): self.__max_abs_exposure = max_abs_exposure @property def position_clip_rebalance_freq(self): return self.__position_clip_rebalance_freq @position_clip_rebalance_freq.setter def position_clip_rebalance_freq(self, position_clip_rebalance_freq): self.__position_clip_rebalance_freq = position_clip_rebalance_freq @property def position_clip_resample_type(self): return self.__position_clip_resample_type @position_clip_resample_type.setter def position_clip_resample_type(self, position_clip_resample_type): self.__position_clip_resample_type = position_clip_resample_type @property def position_clip_resample_freq(self): return self.__position_clip_resample_freq @position_clip_resample_freq.setter def position_clip_resample_freq(self, position_clip_resample_freq): self.__position_clip_resample_freq = position_clip_resample_freq @property def position_clip_period_shift(self): return self.__position_clip_period_shift @position_clip_period_shift.setter def position_clip_period_shift(self, position_clip_period_shift): self.__position_clip_period_shift = position_clip_period_shift ##### stop loss and take profit @property def stop_loss(self): return self.__stop_loss @stop_loss.setter def stop_loss(self, stop_loss): self.__stop_loss = stop_loss @property def take_profit(self): return self.__take_profit @take_profit.setter def take_profit(self, take_profit): self.__take_profit = take_profit ##### tech indicators and spot bp tc @property def tech_params(self): return self.__tech_params @tech_params.setter def tech_params(self, tech_params): self.__tech_params = tech_params @property def spot_tc_bp(self): return self.__spot_tc_bp @spot_tc_bp.setter def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0) #### FOR FUTURE USE ### @property def signal_name(self): return self.__signal_name @signal_name.setter def signal_name(self, signal_name): self.__signal_name = signal_name @property def asset(self): return self.__asset @asset.setter def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.") self.__asset = asset @property def instrument(self): return self.__instrument @instrument.setter def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument @property def signal_delay(self): return self.__signal_delay @signal_delay.setter def signal_delay(self, signal_delay): self.__signal_delay = signal_delay