class TwitterPyThalesians: def __init__(self, *args, **kwargs): self.logger = LoggerManager().getLogger(__name__) def set_key(self, APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET): self.twitter = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET) def auto_set_key(self): self.twitter = Twython(Constants().APP_KEY, Constants().APP_SECRET, Constants().OAUTH_TOKEN, Constants().OAUTH_TOKEN_SECRET) def update_status(self, msg, link=None, picture=None): # 22 chars URL # 23 chars picture chars_lim = 140 if link is not None: chars_lim = chars_lim - (22 * link) if picture is not None: chars_lim = chars_lim - 23 if (len(msg) > chars_lim): self.logger.info("Message too long for Twitter!") if picture is None: self.twitter.update_status(status=msg) else: photo = open(picture, 'rb') self.twitter.update_status_with_media(status=msg, media=photo)
class TwitterPyThalesians: def __init__(self, *args, **kwargs): self.logger = LoggerManager().getLogger(__name__) def set_key(self, APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET): self.twitter = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET) def auto_set_key(self): self.twitter = Twython(Constants().APP_KEY, Constants().APP_SECRET, Constants().OAUTH_TOKEN, Constants().OAUTH_TOKEN_SECRET) def update_status(self, msg, link = None, picture = None): # 22 chars URL # 23 chars picture chars_lim = 140 if link is not None: chars_lim = chars_lim - (22 * link) if picture is not None: chars_lim = chars_lim - 23 if (len(msg) > chars_lim): self.logger.info("Message too long for Twitter!") if picture is None: self.twitter.update_status(status=msg) else: photo = open(picture, 'rb') self.twitter.update_status_with_media(status=msg, media=photo)
class LoaderPandasWeb(LoaderTemplate): def __init__(self): super(LoaderPandasWeb, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, time_series_request): time_series_request_vendor = self.construct_vendor_time_series_request( time_series_request) self.logger.info("Request Pandas Web data") data_frame = self.download_daily(time_series_request_vendor) data_frame = data_frame.to_frame().unstack() print(data_frame.tail()) if data_frame.index is []: return None # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) if data_frame is not None: fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker( returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) ticker_requested = [] for f in time_series_request.fields: for t in time_series_request.tickers: ticker_requested.append(t + "." + f) data_frame.columns = ticker_combined data_frame.index.name = 'Date' # only return the requested tickers data_frame = pandas.DataFrame(data=data_frame[ticker_requested], index=data_frame.index, columns=ticker_requested) self.logger.info("Completed request from Pandas Web.") return data_frame def download_daily(self, time_series_request): return web.DataReader(time_series_request.tickers, time_series_request.data_source, time_series_request.start_date, time_series_request.finish_date)
class LoaderQuandl(LoaderTemplate): def __init__(self): super(LoaderQuandl, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, time_series_request): time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request) self.logger.info("Request Quandl data") data_frame = self.download_daily(time_series_request_vendor) if data_frame.index is []: return None # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_tickers = data_frame.columns if data_frame is not None: # tidy up tickers into a format that is more easily translatable returned_tickers = [x.replace(' - Value', '') for x in returned_tickers] returned_tickers = [x.replace('.', '/') for x in returned_tickers] fields = self.translate_from_vendor_field(['close' for x in returned_tickers], time_series_request) tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' self.logger.info("Completed request from Quandl.") return data_frame def download_daily(self, time_series_request): return Quandl.get(time_series_request.tickers, authtoken=Constants().quandl_api_key, trim_start=time_series_request.start_date, trim_end=time_series_request.finish_date)
class WebDataTemplate: def __init__(self): self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) return @abc.abstractmethod def download_raw_data(self): return @abc.abstractmethod def construct_indicator(self): return def dump_indicator(self): indicator_group = self.raw_indicator # self.raw_indicator.join(self.processed_indicator, how='outer') self.logger.info("About to write all web indicators") indicator_group.to_csv(self._csv_indicator_dump, date_format='%d/%m/%Y %H:%M:%S')
class DataLoaderTemplate: def __init__(self): self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) return def load_database(self, key = None): tsio = TimeSeriesIO() tsc = TimeSeriesCalcs() file = self._hdf5 if key is not None: file = self._hdf5 + key + ".h5" # if cached file exists, use that, otherwise load CSV if os.path.isfile(file): self.logger.info("About to load market database from HDF5...") self.news_database = tsio.read_time_series_cache_from_disk(file) self.news_database = self.preprocess(self.news_database) else: self.logger.info("About to load market database from CSV...") self.news_database = self.load_csv() return self.news_database @abc.abstractmethod def load_csv(self): return def get_database(self, key): return self.news_database @abc.abstractmethod def preprocess(self, df): return
class CreateDataIndexTemplate: def __init__(self): self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) return @abc.abstractmethod def create_indicator(self): return @abc.abstractmethod def aggregate_news_data(self, raw_database): return @abc.abstractmethod def get_cached_aggregate(self): return def grab_indicator(self): return self.indicator def grab_econ_indicator(self): return self.indicator_econ def grab_final_indicator(self): return self.indicator_final def truncate_indicator(self, daily_ind, match): cols = daily_ind.columns.values to_include = [] for i in range(0, len(cols)): if match in cols[i]: to_include.append(i) return daily_ind[daily_ind.columns[to_include]] def dump_indicators(self): tsf = TimeSeriesFilter() self.logger.info("About to write all indicators to CSV") self.indicator.to_csv(self._csv_indicator_dump, date_format='%d/%m/%Y') if (self._csv_econ_indicator_dump is not None): self.logger.info("About to write economy based indicators to CSV") self.indicator_econ.to_csv(self._csv_econ_indicator_dump, date_format='%d/%m/%Y') self.logger.info("About to write final indicators to CSV") # remove weekends and remove start of series if (self._csv_final_indicator_dump is not None): indicator_final_copy = tsf.filter_time_series_by_holidays(self.indicator_final, cal = 'WEEKDAY') indicator_final_copy = tsf.filter_time_series_by_date( start_date="01 Jan 2000", finish_date = None, data_frame=indicator_final_copy) indicator_final_copy.to_csv(self._csv_final_indicator_dump, date_format='%d/%m/%Y')
class DataLoaderTemplate: def __init__(self): self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) return def load_database(self, key=None): tsio = TimeSeriesIO() tsc = TimeSeriesCalcs() file = self._hdf5 if key is not None: file = self._hdf5 + key + ".h5" # if cached file exists, use that, otherwise load CSV if os.path.isfile(file): self.logger.info("About to load market database from HDF5...") self.news_database = tsio.read_time_series_cache_from_disk(file) self.news_database = self.preprocess(self.news_database) else: self.logger.info("About to load market database from CSV...") self.news_database = self.load_csv() return self.news_database @abc.abstractmethod def load_csv(self): return def get_database(self, key): return self.news_database @abc.abstractmethod def preprocess(self, df): return
Constants.time_series_factory_thread_technique = tech for no in thread_no: for key in Constants.time_series_factory_thread_no: Constants.time_series_factory_thread_no[key] = no import time start = time.time(); df = ltsf.harvest_time_series(time_series_request); end = time.time() duration = end - start diag.append("With " + str(no) + " " + tech + " no: " + str(duration) + " seconds") for d in diag: logger.info(d) ###### download intraday data from Bloomberg for FX, with different threading techniques if True: from datetime import timedelta time_series_request = TimeSeriesRequest( start_date = datetime.date.today() - timedelta(days=10), # start date finish_date = datetime.date.today(), # finish date freq = 'intraday', # intraday data data_source = 'bloomberg', # use Bloomberg as data source tickers = ['EURUSD', # ticker (Thalesians) 'GBPUSD', 'USDJPY', 'AUDUSD'],
class BBGLowLevelRef(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelRef, self).__init__() self.logger = LoggerManager().getLogger(__name__) self._options = [] # populate options for Bloomberg request for asset intraday request def fill_options(self, time_series_request): self._options = OptionsBBG() self._options.security = time_series_request.tickers self._options.startDateTime = time_series_request.start_date self._options.endDateTime = time_series_request.finish_date self._options.fields = time_series_request.fields return self._options def process_message(self, msg): data = collections.defaultdict(dict) # process received events securityDataArray = msg.getElement('securityData') index = 0 for securityData in list(securityDataArray.values()): ticker = securityData.getElementAsString("security") fieldData = securityData.getElement("fieldData") for field in fieldData.elements(): if not field.isValid(): field_name = "%s" % field.name() self.logger.error(field_name + " is NULL") elif field.isArray(): # iterate over complex data returns. field_name = "%s" % field.name() for i, row in enumerate(field.values()): data[(field_name, ticker)][index] = re.findall( r'"(.*?)"', "%s" % row)[0] index = index + 1 # else: # vals.append(re.findall(r'"(.*?)"', "%s" % row)[0]) # print("%s = %s" % (field.name(), field.getValueAsString())) fieldExceptionArray = securityData.getElement("fieldExceptions") for fieldException in list(fieldExceptionArray.values()): errorInfo = fieldException.getElement("errorInfo") print(errorInfo.getElementAsString("category"), ":", \ fieldException.getElementAsString("fieldId")) data_frame = pandas.DataFrame(data) # if obsolete ticker could return no values if (not (data_frame.empty)): data_frame.columns = pandas.MultiIndex.from_tuples( data, names=['field', 'ticker']) self.logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) else: return None return data_frame def combine_slices(self, data_frame, data_frame_slice): if (data_frame_slice.columns.get_level_values(1).values[0] not in data_frame.columns.get_level_values(1).values): return data_frame.join(data_frame_slice, how="outer") return data_frame # create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest('ReferenceDataRequest') self.add_override(request, 'TIME_ZONE_OVERRIDE', 23) # force GMT time self.add_override(request, 'START_DT', self._options.startDateTime.strftime('%Y%m%d')) self.add_override(request, 'END_DT', self._options.endDateTime.strftime('%Y%m%d')) # only one security/eventType per request for field in self._options.fields: request.getElement("fields").appendValue(field) for security in self._options.security: request.getElement("securities").appendValue(security) self.logger.info("Sending Bloomberg Ref Request:" + str(request)) session.sendRequest(request)
class TimeSeriesIO: def __init__(self): self.logger = LoggerManager().getLogger(__name__) ### functions to handle Excel on disk def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False): """ write_time_series_to_excel - writes Pandas data frame to disk in Excel format Parameters ---------- fname : str Excel filename to be written to sheet : str sheet in excel data_frame : DataFrame data frame to be written create_new : boolean to create a new Excel file """ if (create_new): writer = pandas.ExcelWriter(fname, engine='xlsxwriter') else: if os.path.isfile(fname): book = load_workbook(fname) writer = pandas.ExcelWriter(fname, engine='xlsxwriter') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) else: writer = pandas.ExcelWriter(fname, engine='xlsxwriter') data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter') writer.save() writer.close() def write_time_series_to_excel_writer(self, writer, sheet, data_frame): data_frame.to_excel(writer, sheet, engine='xlsxwriter') def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff=None, dateparse=None, postfix='.close', intraday_tz='UTC'): return self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse, postfix=postfix, intraday_tz=intraday_tz, excel_sheet=excel_sheet) ### functions to handle HDF5 on disk def write_time_series_cache_to_disk(self, fname, data_frame): """ write_time_series_cache_to_disk - writes Pandas data frame to disk as HDF5 format Parmeters --------- fname : str path of file data_frame : DataFrame data frame to be written to disk """ store = pandas.HDFStore(self.get_h5_filename(fname), complib="blosc", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') store['data'] = data_frame store.close() def get_h5_filename(self, fname): if fname[-3:] == '.h5': return fname return fname + ".h5" def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields=None): fname_r = self.get_h5_filename(fname) self.logger.info("About to dump R binary HDF5 - " + fname_r) data_frame32 = data_frame.astype('float32') if fields is None: fields = data_frame32.columns.values # decompose date/time into individual fields (easier to pick up in R) data_frame32['Year'] = data_frame.index.year data_frame32['Month'] = data_frame.index.month data_frame32['Day'] = data_frame.index.day data_frame32['Hour'] = data_frame.index.hour data_frame32['Minute'] = data_frame.index.minute data_frame32['Second'] = data_frame.index.second data_frame32['Millisecond'] = data_frame.index.microsecond / 1000 data_frame32 = data_frame32[[ 'Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond' ] + fields] cols = data_frame32.columns store_export = pandas.HDFStore(fname_r) store_export.put('df_for_r', data_frame32, data_columns=cols) store_export.close() def read_time_series_cache_from_disk(self, fname): """ read_time_series_cache_from_disk - Reads time series cache from disk Parameters ---------- fname : str file to be written too Returns ------- DataFrame """ if os.path.isfile(self.get_h5_filename(fname)): store = pandas.HDFStore(self.get_h5_filename(fname)) data_frame = store.select("data") if ('intraday' in fname): data_frame = data_frame.astype('float32') store.close() return data_frame return None ### functions for CSV reading and writing def write_time_series_to_csv(self, csv_path, data_frame): data_frame.to_csv(csv_path) def read_csv_data_frame(self, f_name, freq, cutoff=None, dateparse=None, postfix='.close', intraday_tz='UTC', excel_sheet=None): if (freq == 'intraday'): if dateparse is None: dateparse = lambda x: datetime.datetime(*map( int, [x[6:10], x[3:5], x[0:2], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'dukascopy': dateparse = lambda x: datetime.datetime(*map( int, [x[0:4], x[5:7], x[8:10], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'c': # use C library for parsing dates, several hundred times quicker # requires compilation of library to install import ciso8601 dateparse = lambda x: ciso8601.parse_datetime(x) if excel_sheet is None: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=True, date_parser=dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA']) data_frame = data_frame.astype('float32') data_frame.index.names = ['Date'] old_cols = data_frame.columns new_cols = [] # add '.close' to each column name for col in old_cols: new_cols.append(col + postfix) data_frame.columns = new_cols else: # daily data if 'events' in f_name: data_frame = pandas.read_csv(f_name) # very slow conversion data_frame = data_frame.convert_objects(convert_dates='coerce') else: if excel_sheet is None: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["DATE"], date_parser=dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA']) # convert Date to Python datetime # datetime data_frame['Date1'] = data_frame.index # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S') # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]), # int(x[12:13]), int(x[15:16]), int(x[18:19]))) # data_frame.index = data_frame['Date1'] # data_frame.drop('Date1') # slower method: data_frame.index = pandas.to_datetime(data_frame.index) if (freq == 'intraday'): # assume time series are already in UTC and assign this (can specify other time zones) data_frame = data_frame.tz_localize(intraday_tz) # end cutoff date if cutoff is not None: if (isinstance(cutoff, str)): cutoff = parse(cutoff) data_frame = data_frame.loc[data_frame.index < cutoff] return data_frame def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None): self.logger.info("About to read... " + f_name) data_frame = self.read_csv_data_frame(f_name, freq, cutoff=None, dateparse=None) category_f_name = self.create_cache_file_name(category) self.write_time_series_cache_to_disk(category_f_name, data_frame) def clean_csv_file(self, f_name): with codecs.open(f_name, 'rb', 'utf-8') as myfile: data = myfile.read() # clean file first if dirty if data.count('\x00'): self.logger.info('Cleaning CSV...') with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of: of.write(data.replace('\x00', '')) shutil.move(f_name + '.tmp', f_name) def create_cache_file_name(self, filename): return Constants().folder_time_series_data + "/" + filename
class IntradayBarRequest(Request): def __init__(self, symbol, interval, start=None, end=None, event='TRADE'): """ Intraday bar request for bbg Parameters ---------- symbols : string interval : number of minutes start : start date end : end date (if None then use today) event : (TRADE,BID,ASK,BEST_BID,BEST_ASK) """ Request.__init__(self) self.logger = LoggerManager().getLogger(__name__) assert event in ('TRADE', 'BID', 'ASK', 'BEST_BID', 'BEST_ASK') assert isinstance(symbol, str) if start is None: start = datetime.today() - timedelta(30) if end is None: end = datetime.utcnow() self.symbol = symbol self.interval = interval self.start = to_datetime(start) self.end = to_datetime(end) self.event = event # response related self.response = defaultdict(list) def get_bbg_service_name(self): return '//blp/refdata' def get_bbg_request(self, svc, session): # create the bbg request object start, end = self.start, self.end request = svc.CreateRequest('IntradayBarRequest') request.Set('security', self.symbol) request.Set('interval', self.interval) request.Set('eventType', self.event) request.Set( 'startDateTime', session.CreateDatetime(start.year, start.month, start.day, start.hour, start.minute)) request.Set( 'endDateTime', session.CreateDatetime(end.year, end.month, end.day, end.hour, end.minute)) self.logger.info("Fetching intraday data for " + str(self.symbol) + " from " + start.strftime('%d/%m/%Y') + " to " + end.strftime('%d/%m/%Y')) return request def on_event(self, evt, is_final): """ on_event - This is invoked from in response to COM PumpWaitingMessages - different thread """ response = self.response self.logger.debug("Receiving data from Bloomberg...") for msg in XmlHelper.message_iter(evt): bars = msg.GetElement('barData').GetElement('barTickData') self.logger.debug("Read message...") for i in range(bars.NumValues): bar = bars.GetValue(i) ts = bar.GetElement(0).Value dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute) response['time'].append(dt) response['open'].append(bar.GetElement(1).Value) response['high'].append(bar.GetElement(2).Value) response['low'].append(bar.GetElement(3).Value) response['close'].append(bar.GetElement(4).Value) response['volume'].append(bar.GetElement(5).Value) response['events'].append(bar.GetElement(6).Value) if (i % 20000 == 0): dt_str = dt.strftime('%d/%m/%Y') self.logger.debug("Processing " + dt_str) self.logger.debug("Finished processing for ticker.") if is_final: idx = response.pop('time') self.response = DataFrame( response, columns=['open', 'high', 'low', 'close', 'volume', 'events'], index=idx) self.response.index.name = 'Date' self.response = self.response.astype('float32')
return ConfigManager._dict_time_series_fields_list_vendor_to_library[ source + '.' + sourcefield] @staticmethod def convert_library_to_vendor_field(source, field): return ConfigManager._dict_time_series_fields_list_library_to_vendor[ source + '.' + field] ## test function if __name__ == '__main__': logger = LoggerManager().getLogger(__name__) categories = ConfigManager().get_categories_from_fields() logger.info("Categories from fields list") print(categories) categories = ConfigManager().get_categories_from_tickers() logger.info("Categories from tickers list") print(categories) filter = 'events' categories_filtered = ConfigManager( ).get_categories_from_tickers_selective_filter(filter) logger.info("Categories from tickers list, filtered by events") print(categories_filtered) logger.info("For each category, print all tickers and fields")
# have vol target for each signal br.signal_vol_adjust = True br.signal_vol_target = 0.05 br.signal_vol_max_leverage = 3 br.signal_vol_periods = 60 br.signal_vol_obs_in_year = 252 br.signal_vol_rebalance_freq = 'BM' br.signal_vol_resample_freq = None tech_params = TechParams(); tech_params.sma_period = 200; indicator = 'SMA' # pick USD crosses in G10 FX # note: we are calculating returns from spot (it is much better to use to total return # indices for FX, which include carry) logger.info("Loading asset data...") tickers = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD', 'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK'] vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS', 'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS'] time_series_request = TimeSeriesRequest( start_date = "01 Jan 1989", # start date finish_date = datetime.date.today(), # finish date freq = 'daily', # daily data data_source = 'quandl', # use Quandl as data source tickers = tickers, # ticker (Thalesians) fields = ['close'], # which fields to download vendor_tickers = vendor_tickers, # ticker (Quandl)
class LoaderQuandl(LoaderTemplate): def __init__(self): super(LoaderQuandl, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, time_series_request): time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request) self.logger.info("Request Quandl data") data_frame = self.download_daily(time_series_request_vendor) if data_frame is None or data_frame.index is []: return None # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_tickers = data_frame.columns if data_frame is not None: # tidy up tickers into a format that is more easily translatable # we can often get multiple fields returned (even if we don't ask for them!) # convert to lower case returned_fields = [(x.split(' - ')[1]).lower().replace(' ', '-') for x in returned_tickers] returned_fields = [x.replace('value', 'close') for x in returned_fields] # special case for close returned_tickers = [x.replace('.', '/') for x in returned_tickers] returned_tickers = [x.split(' - ')[0] for x in returned_tickers] fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' self.logger.info("Completed request from Quandl.") return data_frame def download_daily(self, time_series_request): trials = 0 data_frame = None while(trials < 5): try: data_frame = Quandl.get(time_series_request.tickers, authtoken=Constants().quandl_api_key, trim_start=time_series_request.start_date, trim_end=time_series_request.finish_date) break except: trials = trials + 1 self.logger.info("Attempting... " + str(trials) + " request to download from Quandl") if trials == 5: self.logger.error("Couldn't download from Quandl after several attempts!") return data_frame
return ConfigManager._dict_time_series_fields_list_vendor_to_library[ source + '.' + sourcefield] @staticmethod def convert_library_to_vendor_field(source, field): return ConfigManager._dict_time_series_fields_list_library_to_vendor[ source + '.' + field] ## test function if __name__ == '__main__': logger = LoggerManager().getLogger(__name__) categories = ConfigManager().get_categories_from_fields() logger.info("Categories from fields list") print(categories) categories = ConfigManager().get_categories_from_tickers() logger.info("Categories from tickers list") print(categories) filter = 'events' categories_filtered = ConfigManager().get_categories_from_tickers_selective_filter(filter) logger.info("Categories from tickers list, filtered by events") print(categories_filtered) logger.info("For each category, print all tickers and fields")
class HistoricalDataRequest(Request): def __init__(self, symbols, fields, start=None, end=None, period='DAILY', addtl_sets=None, ignore_security_error=0, ignore_field_error=0): """ Historical data request for bbg. Parameters ---------- symbols : string or list fields : string or list start : start date (if None then use 1 year ago) end : end date (if None then use today) period : ('DAILY', 'WEEKLY', 'MONTHLY', 'QUARTERLY', 'SEMI-ANNUAL', 'YEARLY') ignore_field_errors : bool ignore_security_errors : bool """ Request.__init__(self, ignore_security_error=ignore_security_error, ignore_field_error=ignore_field_error) assert period in ('DAILY', 'WEEKLY', 'MONTHLY', 'QUARTERLY', 'SEMI-ANNUAL', 'YEARLY') self.symbols = isinstance(symbols, str) and [symbols] or symbols self.fields = isinstance(fields, str) and [fields] or fields if start is None: start = datetime.today() - timedelta(365) # by default download the past year if end is None: end = datetime.today() self.start = to_datetime(start) self.end = to_datetime(end) self.period = period self.logger = LoggerManager().getLogger(__name__) # response related self.response = {} def get_bbg_service_name(self): return '//blp/refdata' def get_bbg_request(self, svc, session): # create the bbg request object request = svc.CreateRequest('HistoricalDataRequest') [request.GetElement('securities').AppendValue(sec) for sec in self.symbols] [request.GetElement('fields').AppendValue(fld) for fld in self.fields] request.Set('startDate', self.start.strftime('%Y%m%d')) request.Set('endDate', self.end.strftime('%Y%m%d')) request.Set('periodicitySelection', self.period) o = request.GetElement('overrides').AppendElment() o.SetElement('fieldId', 'TIME_ZONE_OVERRIDE') o.SetElement('value', 'GMT') return request def on_security_data_node(self, node): """ process a securityData node - FIXME: currently not handling relateDate node """ sid = XmlHelper.get_child_value(node, 'security') farr = node.GetElement('fieldData') dmap = defaultdict(list) self.logger.info("Fetching ticker " + sid) for i in range(farr.NumValues): pt = farr.GetValue(i) [dmap[f].append(XmlHelper.get_child_value(pt, f)) for f in ['date'] + self.fields] self.logger.info("Returning ticker " + sid) idx = dmap.pop('date') frame = DataFrame(dmap, columns=self.fields, index=idx) frame.index.name = 'date' self.response[sid] = frame def on_event(self, evt, is_final): """ on_event - This is invoked from in response to COM PumpWaitingMessages - different thread """ for msg in XmlHelper.message_iter(evt): # Single security element in historical request node = msg.GetElement('securityData') if node.HasElement('securityError'): self.security_errors.append(XmlHelper.as_security_error(node.GetElement('securityError'))) else: self.on_security_data_node(node) def response_as_single(self, copy=0): """ response_as_single - convert the response map to a single data frame with Multi-Index columns """ arr = [] for sid, frame in self.response.items(): if copy: frame = frame.copy() 'security' not in frame and frame.insert(0, 'security', sid) arr.append(frame.reset_index().set_index(['date', 'security'])) # time.sleep(1000) if (arr == []): return arr return concat(arr).unstack() def response_as_panel(self, swap=False): panel = Panel(self.response) if swap: panel = panel.swapaxes('items', 'minor') return panel
class IntradayBarRequest(Request): def __init__(self, symbol, interval, start=None, end=None, event='TRADE'): """ Intraday bar request for bbg Parameters ---------- symbols : string interval : number of minutes start : start date end : end date (if None then use today) event : (TRADE,BID,ASK,BEST_BID,BEST_ASK) """ Request.__init__(self) self.logger = LoggerManager().getLogger(__name__) assert event in ('TRADE', 'BID', 'ASK', 'BEST_BID', 'BEST_ASK') assert isinstance(symbol, str) if start is None: start = datetime.today() - timedelta(30) if end is None: end = datetime.utcnow() self.symbol = symbol self.interval = interval self.start = to_datetime(start) self.end = to_datetime(end) self.event = event # response related self.response = defaultdict(list) def get_bbg_service_name(self): return '//blp/refdata' def get_bbg_request(self, svc, session): # create the bbg request object start, end = self.start, self.end request = svc.CreateRequest('IntradayBarRequest') request.Set('security', self.symbol) request.Set('interval', self.interval) request.Set('eventType', self.event) request.Set('startDateTime', session.CreateDatetime(start.year, start.month, start.day, start.hour, start.minute)) request.Set('endDateTime', session.CreateDatetime(end.year, end.month, end.day, end.hour, end.minute)) self.logger.info("Fetching intraday data for " + str(self.symbol) + " from " + start.strftime('%d/%m/%Y') + " to " + end.strftime('%d/%m/%Y')) return request def on_event(self, evt, is_final): """ on_event - This is invoked from in response to COM PumpWaitingMessages - different thread """ response = self.response self.logger.debug("Receiving data from Bloomberg...") for msg in XmlHelper.message_iter(evt): bars = msg.GetElement('barData').GetElement('barTickData') self.logger.debug("Read message...") for i in range(bars.NumValues): bar = bars.GetValue(i) ts = bar.GetElement(0).Value dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute) response['time'].append(dt) response['open'].append(bar.GetElement(1).Value) response['high'].append(bar.GetElement(2).Value) response['low'].append(bar.GetElement(3).Value) response['close'].append(bar.GetElement(4).Value) response['volume'].append(bar.GetElement(5).Value) response['events'].append(bar.GetElement(6).Value) if (i % 20000 == 0): dt_str = dt.strftime('%d/%m/%Y') self.logger.debug("Processing " + dt_str) self.logger.debug("Finished processing for ticker.") if is_final: idx = response.pop('time') self.response = DataFrame(response, columns=['open', 'high', 'low', 'close', 'volume', 'events'], index=idx) self.response.index.name = 'Date' self.response = self.response.astype('float32')
br.ann_factor = 252 # have vol target for each signal br.signal_vol_adjust = True br.signal_vol_target = 0.05 br.signal_vol_max_leverage = 3 br.signal_vol_periods = 60 br.signal_vol_obs_in_year = 252 br.signal_vol_rebalance_freq = 'BM' tech_params = TechParams(); tech_params.sma_period = 200; indicator = 'SMA' # pick USD crosses in G10 FX # note: we are calculating returns from spot (it is much better to use to total return # indices for FX, which include carry) logger.info("Loading asset data...") tickers = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD', 'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK'] vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS', 'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS'] time_series_request = TimeSeriesRequest( start_date = "01 Jan 1989", # start date finish_date = datetime.date.today(), # finish date freq = 'daily', # daily data data_source = 'quandl', # use Quandl as data source tickers = tickers, # ticker (Thalesians) fields = ['close'], # which fields to download vendor_tickers = vendor_tickers, # ticker (Quandl)
class CreateDataIndexTemplate: def __init__(self): self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) return @abc.abstractmethod def create_indicator(self): return @abc.abstractmethod def aggregate_news_data(self, raw_database): return @abc.abstractmethod def get_cached_aggregate(self): return def grab_indicator(self): return self.indicator def grab_econ_indicator(self): return self.indicator_econ def grab_final_indicator(self): return self.indicator_final def truncate_indicator(self, daily_ind, match): cols = daily_ind.columns.values to_include = [] for i in range(0, len(cols)): if match in cols[i]: to_include.append(i) return daily_ind[daily_ind.columns[to_include]] def dump_indicators(self): tsf = TimeSeriesFilter() self.logger.info("About to write all indicators to CSV") self.indicator.to_csv(self._csv_indicator_dump, date_format='%d/%m/%Y') if (self._csv_econ_indicator_dump is not None): self.logger.info("About to write economy based indicators to CSV") self.indicator_econ.to_csv(self._csv_econ_indicator_dump, date_format='%d/%m/%Y') self.logger.info("About to write final indicators to CSV") # remove weekends and remove start of series if (self._csv_final_indicator_dump is not None): indicator_final_copy = tsf.filter_time_series_by_holidays( self.indicator_final, cal='WEEKDAY') indicator_final_copy = tsf.filter_time_series_by_date( start_date="01 Jan 2000", finish_date=None, data_frame=indicator_final_copy) indicator_final_copy.to_csv(self._csv_final_indicator_dump, date_format='%d/%m/%Y')
class BBGLowLevelTick(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelTick, self).__init__() self.logger = LoggerManager().getLogger(__name__) # constants self.TICK_DATA = blpapi.Name("tickData") self.COND_CODE = blpapi.Name("conditionCodes") self.TICK_SIZE = blpapi.Name("size") self.TIME = blpapi.Name("time") self.TYPE = blpapi.Name("type") self.VALUE = blpapi.Name("value") self.RESPONSE_ERROR = blpapi.Name("responseError") self.CATEGORY = blpapi.Name("category") self.MESSAGE = blpapi.Name("message") self.SESSION_TERMINATED = blpapi.Name("SessionTerminated") def combine_slices(self, data_frame, data_frame_slice): return data_frame.append(data_frame_slice) # populate options for Bloomberg request for asset intraday request def fill_options(self, time_series_request): self._options = OptionsBBG() self._options.security = time_series_request.tickers[ 0] # get 1st ticker only! self._options.event = time_series_request.trade_side.upper() # self._options.barInterval = time_series_request.freq_mult self._options.startDateTime = time_series_request.start_date self._options.endDateTime = time_series_request.finish_date # self._options.gapFillInitialBar = False if hasattr(self._options.startDateTime, 'microsecond'): self._options.startDateTime = self._options.startDateTime.replace( microsecond=0) if hasattr(self._options.endDateTime, 'microsecond'): self._options.endDateTime = self._options.endDateTime.replace( microsecond=0) return self._options # iterate through Bloomberg output creating a DataFrame output # implements abstract method def process_message(self, msg): data = msg.getElement(self.TICK_DATA).getElement(self.TICK_DATA) self.logger.info("Processing tick data for " + str(self._options.security)) tuple = [] data_vals = data.values() # for item in list(data_vals): # if item.hasElement(self.COND_CODE): # cc = item.getElementAsString(self.COND_CODE) # else: # cc = "" # # # each price time point has multiple fields - marginally quicker # tuple.append(([item.getElementAsFloat(self.VALUE), # item.getElementAsInteger(self.TICK_SIZE)], # item.getElementAsDatetime(self.TIME))) # slightly faster this way (note, we are skipping trade & CC fields) tuple = [([ item.getElementAsFloat(self.VALUE), item.getElementAsInteger(self.TICK_SIZE) ], item.getElementAsDatetime(self.TIME)) for item in data_vals] data_table = list(map(itemgetter(0), tuple)) time_list = list(map(itemgetter(1), tuple)) try: self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1])) except: self.logger.info("No dates retrieved") return None # create pandas dataframe with the Bloomberg output return pandas.DataFrame(data=data_table, index=time_list, columns=['close', 'ticksize']) # implement abstract method: create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("IntradayTickRequest") # only one security/eventType per request request.set("security", self._options.security) request.getElement("eventTypes").appendValue("TRADE") # request.set("eventTypes", self._options.event) request.set("includeConditionCodes", True) # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT') if self._options.startDateTime and self._options.endDateTime: request.set("startDateTime", self._options.startDateTime) request.set("endDateTime", self._options.endDateTime) self.logger.info("Sending Tick Bloomberg Request...") session.sendRequest(request)
class BBGLowLevelIntraday(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelIntraday, self).__init__() self.logger = LoggerManager().getLogger(__name__) # constants self.BAR_DATA = blpapi.Name("barData") self.BAR_TICK_DATA = blpapi.Name("barTickData") self.OPEN = blpapi.Name("open") self.HIGH = blpapi.Name("high") self.LOW = blpapi.Name("low") self.CLOSE = blpapi.Name("close") self.VOLUME = blpapi.Name("volume") self.NUM_EVENTS = blpapi.Name("numEvents") self.TIME = blpapi.Name("time") def combine_slices(self, data_frame, data_frame_slice): return data_frame.append(data_frame_slice) # populate options for Bloomberg request for asset intraday request def fill_options(self, time_series_request): self._options = OptionsBBG() self._options.security = time_series_request.tickers[ 0] # get 1st ticker only! self._options.event = "TRADE" self._options.barInterval = time_series_request.freq_mult self._options.startDateTime = time_series_request.start_date self._options.endDateTime = time_series_request.finish_date self._options.gapFillInitialBar = False if hasattr(self._options.startDateTime, 'microsecond'): self._options.startDateTime = self._options.startDateTime.replace( microsecond=0) if hasattr(self._options.endDateTime, 'microsecond'): self._options.endDateTime = self._options.endDateTime.replace( microsecond=0) return self._options # iterate through Bloomberg output creating a DataFrame output # implements abstract method def process_message(self, msg): data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA) self.logger.info("Processing intraday data for " + str(self._options.security)) data_vals = list(data.values()) # data_matrix = numpy.zeros([len(data_vals), 6]) # data_matrix.fill(numpy.nan) # # date_index = [None] * len(data_vals) # # for i in range(0, len(data_vals)): # data_matrix[i][0] = data_vals[i].getElementAsFloat(self.OPEN) # data_matrix[i][1] = data_vals[i].getElementAsFloat(self.HIGH) # data_matrix[i][2] = data_vals[i].getElementAsFloat(self.LOW) # data_matrix[i][3] = data_vals[i].getElementAsFloat(self.CLOSE) # data_matrix[i][4] = data_vals[i].getElementAsInteger(self.VOLUME) # data_matrix[i][5] = data_vals[i].getElementAsInteger(self.NUM_EVENTS) # # date_index[i] = data_vals[i].getElementAsDatetime(self.TIME) # # self.logger.info("Dates between " + str(date_index[0]) + " - " + str(date_index[-1])) # # # create pandas dataframe with the Bloomberg output # return pandas.DataFrame(data = data_matrix, index = date_index, # columns=['open', 'high', 'low', 'close', 'volume', 'events']) ## for loop method is touch slower # time_list = [] # data_table = [] # for bar in data_vals: # data_table.append([bar.getElementAsFloat(self.OPEN), # bar.getElementAsFloat(self.HIGH), # bar.getElementAsFloat(self.LOW), # bar.getElementAsFloat(self.CLOSE), # bar.getElementAsInteger(self.VOLUME), # bar.getElementAsInteger(self.NUM_EVENTS)]) # # time_list.append(bar.getElementAsDatetime(self.TIME)) # each price time point has multiple fields - marginally quicker tuple = [([ bar.getElementAsFloat(self.OPEN), bar.getElementAsFloat(self.HIGH), bar.getElementAsFloat(self.LOW), bar.getElementAsFloat(self.CLOSE), bar.getElementAsInteger(self.VOLUME), bar.getElementAsInteger(self.NUM_EVENTS) ], bar.getElementAsDatetime(self.TIME)) for bar in data_vals] data_table = list(map(itemgetter(0), tuple)) time_list = list(map(itemgetter(1), tuple)) try: self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1])) except: self.logger.info("No dates retrieved") return None # create pandas dataframe with the Bloomberg output return pandas.DataFrame( data=data_table, index=time_list, columns=['open', 'high', 'low', 'close', 'volume', 'events']) # implement abstract method: create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("IntradayBarRequest") # only one security/eventType per request request.set("security", self._options.security) request.set("eventType", self._options.event) request.set("interval", self._options.barInterval) # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT') if self._options.startDateTime and self._options.endDateTime: request.set("startDateTime", self._options.startDateTime) request.set("endDateTime", self._options.endDateTime) if self._options.gapFillInitialBar: request.append("gapFillInitialBar", True) self.logger.info("Sending Intraday Bloomberg Request...") session.sendRequest(request)
class TimeSeriesIO: def __init__(self): self.logger = LoggerManager().getLogger(__name__) ### functions to handle Excel on disk def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False): """ write_time_series_to_excel - writes Pandas data frame to disk in Excel format Parameters ---------- fname : str Excel filename to be written to sheet : str sheet in excel data_frame : DataFrame data frame to be written create_new : boolean to create a new Excel file """ if(create_new): writer = pandas.ExcelWriter(fname, engine='xlsxwriter') else: if os.path.isfile(fname): book = load_workbook(fname) writer = pandas.ExcelWriter(fname, engine='xlsxwriter') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) else: writer = pandas.ExcelWriter(fname, engine='xlsxwriter') data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter') writer.save() writer.close() def write_time_series_to_excel_writer(self, writer, sheet, data_frame): data_frame.to_excel(writer, sheet, engine='xlsxwriter') def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff = None, dateparse = None, postfix = '.close', intraday_tz = 'UTC'): return self.read_csv_data_frame(f_name, freq, cutoff = cutoff, dateparse = dateparse, postfix = postfix, intraday_tz = intraday_tz, excel_sheet = excel_sheet) ### functions to handle HDF5 on disk def write_time_series_cache_to_disk(self, fname, data_frame): """ write_time_series_cache_to_disk - writes Pandas data frame to disk as HDF5 format Parmeters --------- fname : str path of file data_frame : DataFrame data frame to be written to disk """ store = pandas.HDFStore(self.get_h5_filename(fname), complib="blosc", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') store['data'] = data_frame store.close() def get_h5_filename(self, fname): if fname[-3:] == '.h5': return fname return fname + ".h5" def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields = None): fname_r = self.get_h5_filename(fname) self.logger.info("About to dump R binary HDF5 - " + fname_r) data_frame32 = data_frame.astype('float32') if fields is None: fields = data_frame32.columns.values # decompose date/time into individual fields (easier to pick up in R) data_frame32['Year'] = data_frame.index.year data_frame32['Month'] = data_frame.index.month data_frame32['Day'] = data_frame.index.day data_frame32['Hour'] = data_frame.index.hour data_frame32['Minute'] = data_frame.index.minute data_frame32['Second'] = data_frame.index.second data_frame32['Millisecond'] = data_frame.index.microsecond / 1000 data_frame32 = data_frame32[ ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'] + fields] cols = data_frame32.columns store_export = pandas.HDFStore(fname_r) store_export.put('df_for_r', data_frame32, data_columns=cols) store_export.close() def read_time_series_cache_from_disk(self, fname): """ read_time_series_cache_from_disk - Reads time series cache from disk Parameters ---------- fname : str file to be written too Returns ------- DataFrame """ if os.path.isfile(self.get_h5_filename(fname)): store = pandas.HDFStore(self.get_h5_filename(fname)) data_frame = store.select("data") if ('intraday' in fname): data_frame = data_frame.astype('float32') store.close() return data_frame return None ### functions for CSV reading and writing def write_time_series_to_csv(self, csv_path, data_frame): data_frame.to_csv(csv_path) def read_csv_data_frame(self, f_name, freq, cutoff = None, dateparse = None, postfix = '.close', intraday_tz = 'UTC', excel_sheet = None): if(freq == 'intraday'): if dateparse is None: dateparse = lambda x: datetime.datetime(*map(int, [x[6:10], x[3:5], x[0:2], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'dukascopy': dateparse = lambda x: datetime.datetime(*map(int, [x[0:4], x[5:7], x[8:10], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'c': # use C library for parsing dates, several hundred times quicker # requires compilation of library to install import ciso8601 dateparse = lambda x: ciso8601.parse_datetime(x) if excel_sheet is None: data_frame = pandas.read_csv(f_name, index_col = 0, parse_dates = True, date_parser = dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col = 0, na_values=['NA']) data_frame = data_frame.astype('float32') data_frame.index.names = ['Date'] old_cols = data_frame.columns new_cols = [] # add '.close' to each column name for col in old_cols: new_cols.append(col + postfix) data_frame.columns = new_cols else: # daily data if 'events' in f_name: data_frame = pandas.read_csv(f_name) # very slow conversion data_frame = data_frame.convert_objects(convert_dates = 'coerce') else: if excel_sheet is None: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates =["DATE"], date_parser = dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col = 0, na_values=['NA']) # convert Date to Python datetime # datetime data_frame['Date1'] = data_frame.index # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S') # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]), # int(x[12:13]), int(x[15:16]), int(x[18:19]))) # data_frame.index = data_frame['Date1'] # data_frame.drop('Date1') # slower method: data_frame.index = pandas.to_datetime(data_frame.index) if(freq == 'intraday'): # assume time series are already in UTC and assign this (can specify other time zones) data_frame = data_frame.tz_localize(intraday_tz) # end cutoff date if cutoff is not None: if (isinstance(cutoff, str)): cutoff = parse(cutoff) data_frame = data_frame.loc[data_frame.index < cutoff] return data_frame def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None): self.logger.info("About to read... " + f_name) data_frame = self.read_csv_data_frame(f_name, freq, cutoff=None, dateparse=None) category_f_name = self.create_cache_file_name(category) self.write_time_series_cache_to_disk( category_f_name, data_frame) def clean_csv_file(self, f_name): with codecs.open (f_name, 'rb', 'utf-8') as myfile: data = myfile.read() # clean file first if dirty if data.count( '\x00' ): self.logger.info('Cleaning CSV...') with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of: of.write(data.replace('\x00', '')) shutil.move(f_name + '.tmp', f_name) def create_cache_file_name(self, filename): return Constants().folder_time_series_data + "/" + filename
class LoaderBBG(LoaderTemplate): def __init__(self): super(LoaderBBG, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, time_series_request): """ load_ticker - Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- time_series_request : TimeSeriesRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ time_series_request_vendor = self.construct_vendor_time_series_request( time_series_request) data_frame = None self.logger.info("Request Bloomberg data") # do we need daily or intraday data? if (time_series_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']): # for events times/dates separately needs ReferenceDataRequest (when specified) if 'release-date-time-full' in time_series_request.fields: # experimental datetime_data_frame = self.get_reference_data( time_series_request_vendor, time_series_request) # remove fields 'release-date-time-full' from our request (and the associated field in the vendor) index = time_series_request.fields.index( 'release-date-time-full') time_series_request_vendor.fields.pop(index) time_series_request.fields.pop(index) # download all the other event fields (uses HistoricalDataRequest to Bloomberg) # concatenate with date time fields if len(time_series_request_vendor.fields) > 0: events_data_frame = self.get_daily_data( time_series_request, time_series_request_vendor) col = events_data_frame.index.name events_data_frame = events_data_frame.reset_index( drop=False) data_frame = pandas.concat( [events_data_frame, datetime_data_frame], axis=1) temp = data_frame[col] del data_frame[col] data_frame.index = temp else: data_frame = datetime_data_frame # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg else: data_frame = self.get_daily_data(time_series_request, time_series_request_vendor) # assume one ticker only # for intraday data we use IntradayDataRequest to Bloomberg if (time_series_request.freq in ['intraday', 'minute', 'hourly']): time_series_request_vendor.tickers = time_series_request_vendor.tickers[ 0] data_frame = self.download_intraday(time_series_request_vendor) if data_frame is not None: if data_frame.empty: self.logger.info("No tickers returned for: " + time_series_request_vendor.tickers) return None cols = data_frame.columns.values data_frame.tz_localize('UTC') cols = time_series_request.tickers[0] + "." + cols data_frame.columns = cols self.logger.info("Completed request from Bloomberg.") return data_frame def get_daily_data(self, time_series_request, time_series_request_vendor): data_frame = self.download_daily(time_series_request_vendor) # convert from vendor to Thalesians tickers/fields if data_frame is not None: if data_frame.empty: self.logger.info("No tickers returned for...") try: self.logger.info(str(time_series_request_vendor.tickers)) except: pass return None returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) # TODO if empty try downloading again a year later fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker( returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' return data_frame def get_reference_data(self, time_series_request_vendor, time_series_request): end = datetime.datetime.today() end = end.replace(year=end.year + 1) time_series_request_vendor.finish_date = end self.logger.debug("Requesting ref for " + time_series_request_vendor.tickers[0] + " etc.") data_frame = self.download_ref(time_series_request_vendor) self.logger.debug("Waiting for ref...") # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) if data_frame is not None: # TODO if empty try downloading again a year later fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker( returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame = data_frame.convert_objects(convert_dates='coerce', convert_numeric='coerce') return data_frame # implement method in abstract superclass @abc.abstractmethod def kill_session(self): return @abc.abstractmethod def download_intraday(self, time_series_request): return @abc.abstractmethod def download_daily(self, time_series_request): return @abc.abstractmethod def download_ref(self, time_series_request): return
class BBGLowLevelRef(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelRef, self).__init__() self.logger = LoggerManager().getLogger(__name__) self._options = [] # populate options for Bloomberg request for asset intraday request def fill_options(self, time_series_request): self._options = OptionsBBG() self._options.security = time_series_request.tickers self._options.startDateTime = time_series_request.start_date self._options.endDateTime = time_series_request.finish_date self._options.fields = time_series_request.fields return self._options def process_message(self, msg): data = collections.defaultdict(dict) # process received events securityDataArray = msg.getElement('securityData') index = 0 for securityData in list(securityDataArray.values()): ticker = securityData.getElementAsString("security") fieldData = securityData.getElement("fieldData") for field in fieldData.elements(): if not field.isValid(): field_name = "%s" % field.name() self.logger.error(field_name + " is NULL") elif field.isArray(): # iterate over complex data returns. field_name = "%s" % field.name() for i, row in enumerate(field.values()): data[(field_name, ticker)][index] = re.findall(r'"(.*?)"', "%s" % row)[0] index = index + 1 # else: # vals.append(re.findall(r'"(.*?)"', "%s" % row)[0]) # print("%s = %s" % (field.name(), field.getValueAsString())) fieldExceptionArray = securityData.getElement("fieldExceptions") for fieldException in list(fieldExceptionArray.values()): errorInfo = fieldException.getElement("errorInfo") print(errorInfo.getElementAsString("category"), ":", \ fieldException.getElementAsString("fieldId")) data_frame = pandas.DataFrame(data) # if obsolete ticker could return no values if (not(data_frame.empty)): data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker']) self.logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) else: return None return data_frame def combine_slices(self, data_frame, data_frame_slice): if (data_frame_slice.columns.get_level_values(1).values[0] not in data_frame.columns.get_level_values(1).values): return data_frame.join(data_frame_slice, how="outer") return data_frame # create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest('ReferenceDataRequest') self.add_override(request, 'TIME_ZONE_OVERRIDE', 23) # force GMT time self.add_override(request, 'START_DT', self._options.startDateTime.strftime('%Y%m%d')) self.add_override(request, 'END_DT', self._options.endDateTime.strftime('%Y%m%d')) # only one security/eventType per request for field in self._options.fields: request.getElement("fields").appendValue(field) for security in self._options.security: request.getElement("securities").appendValue(security) self.logger.info("Sending Bloomberg Ref Request:" + str(request)) session.sendRequest(request)
class TimeSeriesIO: def __init__(self): self.logger = LoggerManager().getLogger(__name__) ### functions to handle Excel on disk def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False): """ write_time_series_to_excel - writes Pandas data frame to disk in Excel format Parameters ---------- fname : str Excel filename to be written to sheet : str sheet in excel data_frame : DataFrame data frame to be written create_new : boolean to create a new Excel file """ if(create_new): writer = pandas.ExcelWriter(fname, engine='xlsxwriter') else: if os.path.isfile(fname): book = load_workbook(fname) writer = pandas.ExcelWriter(fname, engine='xlsxwriter') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) else: writer = pandas.ExcelWriter(fname, engine='xlsxwriter') data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter') writer.save() writer.close() def write_time_series_to_excel_writer(self, writer, sheet, data_frame): """ write_time_series_to_excel_writer - writes Pandas data frame to disk in Excel format for a writer Parameters ---------- writer : ExcelWriter File handle to use for writing Excel file to disk sheet : str sheet in excel data_frame : DataFrame data frame to be written """ data_frame.to_excel(writer, sheet, engine='xlsxwriter') def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff = None, dateparse = None, postfix = '.close', intraday_tz = 'UTC'): """ read_excel_data_frame - Reads Excel from disk into DataFrame Parameters ---------- f_name : str Excel file path to read freq : str Frequency of data to read (intraday/daily etc) cutoff : DateTime (optional) end date to read up to dateparse : str (optional) date parser to use postfix : str (optional) postfix to add to each columns intraday_tz : str timezone of file if uses intraday data Returns ------- DataFrame """ return self.read_csv_data_frame(f_name, freq, cutoff = cutoff, dateparse = dateparse, postfix = postfix, intraday_tz = intraday_tz, excel_sheet = excel_sheet) ### functions to handle HDF5 on disk def write_time_series_cache_to_disk(self, fname, data_frame, use_bcolz = False): """ write_time_series_cache_to_disk - writes Pandas data frame to disk as HDF5 format or bcolz format Parmeters --------- fname : str path of file data_frame : DataFrame data frame to be written to disk """ if (use_bcolz): # convert invalid characters to substitutes (which Bcolz can't deal with) data_frame.columns = self.find_replace_chars(data_frame.columns, _invalid_chars, _replace_chars) data_frame.columns = ['A_' + x for x in data_frame.columns] data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns') bcolzpath = self.get_bcolz_filename(fname) shutil.rmtree(bcolzpath, ignore_errors=True) zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath) else: h5_filename_temp = self.get_h5_filename(fname + ".temp") h5_filename = self.get_h5_filename(fname) # delete the old copy try: # os.remove(h5_filename_temp) temp = 0 except: pass store = pandas.HDFStore(h5_filename_temp, complib="blosc", complevel=9) if ('intraday' in fname): data_frame = data_frame.astype('float32') store['data'] = data_frame store.close() # delete the old copy try: os.remove(h5_filename) except: pass # once written to disk rename os.rename(h5_filename_temp, h5_filename) def get_h5_filename(self, fname): """ get_h5_filename - Strips h5 off filename returning first portion of filename Parameters ---------- fname : str h5 filename to strip Returns ------- str """ if fname[-3:] == '.h5': return fname return fname + ".h5" def get_bcolz_filename(self, fname): """ get_bcolz_filename - Strips h5 off filename returning first portion of filename Parameters ---------- fname : str h5 filename to strip Returns ------- str """ if fname[-6:] == '.bcolz': return fname return fname + ".bcolz" def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields = None): """ write_r_compatible_hdf_dataframe - Write a DataFrame to disk in as an R compatible HDF5 file Parameters ---------- data_frame : DataFrame data frame to be written fname : str file path to be written fields : list(str) columns to be written """ fname_r = self.get_h5_filename(fname) self.logger.info("About to dump R binary HDF5 - " + fname_r) data_frame32 = data_frame.astype('float32') if fields is None: fields = data_frame32.columns.values # decompose date/time into individual fields (easier to pick up in R) data_frame32['Year'] = data_frame.index.year data_frame32['Month'] = data_frame.index.month data_frame32['Day'] = data_frame.index.day data_frame32['Hour'] = data_frame.index.hour data_frame32['Minute'] = data_frame.index.minute data_frame32['Second'] = data_frame.index.second data_frame32['Millisecond'] = data_frame.index.microsecond / 1000 data_frame32 = data_frame32[ ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'] + fields] cols = data_frame32.columns store_export = pandas.HDFStore(fname_r) store_export.put('df_for_r', data_frame32, data_columns=cols) store_export.close() def read_time_series_cache_from_disk(self, fname, use_bcolz = False): """ read_time_series_cache_from_disk - Reads time series cache from disk in either HDF5 or bcolz Parameters ---------- fname : str file to be read from Returns ------- DataFrame """ if (use_bcolz): try: name = self.get_bcolz_filename(fname) zlens = bcolz.open(rootdir=name) data_frame = zlens.todataframe() data_frame.index = pandas.DatetimeIndex(data_frame['DTS_']) data_frame.index.name = 'Date' del data_frame['DTS_'] # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas data_frame.columns = self.find_replace_chars(data_frame.columns, _replace_chars, _invalid_chars) data_frame.columns = [x[2:] for x in data_frame.columns] return data_frame except: return None elif os.path.isfile(self.get_h5_filename(fname)): store = pandas.HDFStore(self.get_h5_filename(fname)) data_frame = store.select("data") if ('intraday' in fname): data_frame = data_frame.astype('float32') store.close() return data_frame return None ### functions for CSV reading and writing def write_time_series_to_csv(self, csv_path, data_frame): data_frame.to_csv(csv_path) def read_csv_data_frame(self, f_name, freq, cutoff = None, dateparse = None, postfix = '.close', intraday_tz = 'UTC', excel_sheet = None): """ read_csv_data_frame - Reads CSV/Excel from disk into DataFrame Parameters ---------- f_name : str CSV/Excel file path to read freq : str Frequency of data to read (intraday/daily etc) cutoff : DateTime (optional) end date to read up to dateparse : str (optional) date parser to use postfix : str (optional) postfix to add to each columns intraday_tz : str (optional) timezone of file if uses intraday data excel_sheet : str (optional) Excel sheet to be read Returns ------- DataFrame """ if(freq == 'intraday'): if dateparse is None: dateparse = lambda x: datetime.datetime(*map(int, [x[6:10], x[3:5], x[0:2], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'dukascopy': dateparse = lambda x: datetime.datetime(*map(int, [x[0:4], x[5:7], x[8:10], x[11:13], x[14:16], x[17:19]])) elif dateparse is 'c': # use C library for parsing dates, several hundred times quicker # requires compilation of library to install import ciso8601 dateparse = lambda x: ciso8601.parse_datetime(x) if excel_sheet is None: data_frame = pandas.read_csv(f_name, index_col = 0, parse_dates = True, date_parser = dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col = 0, na_values=['NA']) data_frame = data_frame.astype('float32') data_frame.index.names = ['Date'] old_cols = data_frame.columns new_cols = [] # add '.close' to each column name for col in old_cols: new_cols.append(col + postfix) data_frame.columns = new_cols else: # daily data if 'events' in f_name: data_frame = pandas.read_csv(f_name) # very slow conversion data_frame = data_frame.convert_objects(convert_dates = 'coerce') else: if excel_sheet is None: try: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates =["DATE"], date_parser = dateparse) except: data_frame = pandas.read_csv(f_name, index_col=0, parse_dates =["Date"], date_parser = dateparse) else: data_frame = pandas.read_excel(f_name, excel_sheet, index_col = 0, na_values=['NA']) # convert Date to Python datetime # datetime data_frame['Date1'] = data_frame.index # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S') # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]), # int(x[12:13]), int(x[15:16]), int(x[18:19]))) # data_frame.index = data_frame['Date1'] # data_frame.drop('Date1') # slower method: data_frame.index = pandas.to_datetime(data_frame.index) if(freq == 'intraday'): # assume time series are already in UTC and assign this (can specify other time zones) data_frame = data_frame.tz_localize(intraday_tz) # end cutoff date if cutoff is not None: if (isinstance(cutoff, str)): cutoff = parse(cutoff) data_frame = data_frame.loc[data_frame.index < cutoff] return data_frame def find_replace_chars(self, array, to_find, replace_with): for i in range(0, len(to_find)): array = [x.replace(to_find[i], replace_with[i]) for x in array] return array def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None): """ convert_csv_data_frame - Converts CSV file to HDF5 file Parameters ---------- f_name : str File name to be read category : str data category of file (used in HDF5 filename) freq : str intraday/daily frequency (used in HDF5 filename) cutoff : DateTime (optional) filter dates up to here dateparse : str date parser to use """ self.logger.info("About to read... " + f_name) data_frame = self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse) category_f_name = self.create_cache_file_name(category) self.write_time_series_cache_to_disk( category_f_name, data_frame) def clean_csv_file(self, f_name): """ clean_csv_file - Cleans up CSV file (removing empty characters) before writing back to disk Parameters ---------- f_name : str CSV file to be cleaned """ with codecs.open (f_name, 'rb', 'utf-8') as myfile: data = myfile.read() # clean file first if dirty if data.count( '\x00' ): self.logger.info('Cleaning CSV...') with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of: of.write(data.replace('\x00', '')) shutil.move(f_name + '.tmp', f_name) def create_cache_file_name(self, filename): return Constants().folder_time_series_data + "/" + filename
for no in thread_no: for key in Constants.time_series_factory_thread_no: Constants.time_series_factory_thread_no[key] = no import time start = time.time() df = ltsf.harvest_time_series(time_series_request) end = time.time() duration = end - start diag.append("With " + str(no) + " " + tech + " no: " + str(duration) + " seconds") for d in diag: logger.info(d) ###### download intraday data from Bloomberg for FX, with different threading techniques if True: from datetime import timedelta time_series_request = TimeSeriesRequest( start_date=datetime.date.today() - timedelta(days=10), # start date finish_date=datetime.date.today(), # finish date freq='intraday', # intraday data data_source='bloomberg', # use Bloomberg as data source tickers=[ 'EURUSD', # ticker (Thalesians) 'GBPUSD',
class LoaderQuandl(LoaderTemplate): def __init__(self): super(LoaderQuandl, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, time_series_request): time_series_request_vendor = self.construct_vendor_time_series_request( time_series_request) self.logger.info("Request Quandl data") data_frame = self.download_daily(time_series_request_vendor) if data_frame is None or data_frame.index is []: return None # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_tickers = data_frame.columns if data_frame is not None: # tidy up tickers into a format that is more easily translatable # we can often get multiple fields returned (even if we don't ask for them!) # convert to lower case returned_fields = [(x.split(' - ')[1]).lower().replace(' ', '-') for x in returned_tickers] returned_fields = [ x.replace('value', 'close') for x in returned_fields ] # special case for close returned_tickers = [x.replace('.', '/') for x in returned_tickers] returned_tickers = [x.split(' - ')[0] for x in returned_tickers] fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker( returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' self.logger.info("Completed request from Quandl.") return data_frame def download_daily(self, time_series_request): trials = 0 data_frame = None while (trials < 5): try: data_frame = Quandl.get( time_series_request.tickers, authtoken=Constants().quandl_api_key, trim_start=time_series_request.start_date, trim_end=time_series_request.finish_date) break except: trials = trials + 1 self.logger.info("Attempting... " + str(trials) + " request to download from Quandl") if trials == 5: self.logger.error( "Couldn't download from Quandl after several attempts!") return data_frame
class TradeAnalysis: def __init__(self): self.logger = LoggerManager().getLogger(__name__) self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime( "%Y%m%d") + ' ' self.scale_factor = 3 return def run_strategy_returns_stats(self, strategy): """ run_strategy_returns_stats - Plots useful statistics for the trading strategy (using PyFolio) Parameters ---------- strategy : StrategyTemplate defining trading strategy """ pnl = strategy.get_strategy_pnl() tz = TimeSeriesTimezone() tsc = TimeSeriesCalcs() # PyFolio assumes UTC time based DataFrames (so force this localisation) try: pnl = tz.localise_index_as_UTC(pnl) except: pass # set the matplotlib style sheet & defaults try: matplotlib.rcdefaults() plt.style.use(Constants(). plotfactory_pythalesians_style_sheet['pythalesians']) except: pass # TODO for intraday strategies, make daily # convert DataFrame (assumed to have only one column) to Series pnl = tsc.calculate_returns(pnl) pnl = pnl[pnl.columns[0]] fig = pf.create_returns_tear_sheet(pnl, return_fig=True) try: plt.savefig(strategy.DUMP_PATH + "stats.png") except: pass plt.show() def run_tc_shock(self, strategy, tc=None): if tc is None: tc = [0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0] parameter_list = [{'spot_tc_bp': x} for x in tc] pretty_portfolio_names = [str(x) + 'bp' for x in tc] # names of the portfolio parameter_type = 'TC analysis' # broad type of parameter name return self.run_arbitrary_sensitivity( strategy, parameter_list=parameter_list, pretty_portfolio_names=pretty_portfolio_names, parameter_type=parameter_type) ###### Parameters and signal generations (need to be customised for every model) def run_arbitrary_sensitivity(self, strat, parameter_list=None, parameter_names=None, pretty_portfolio_names=None, parameter_type=None): asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets() port_list = None tsd_list = [] for i in range(0, len(parameter_list)): br = strat.fill_backtest_request() current_parameter = parameter_list[i] # for calculating P&L for k in current_parameter.keys(): setattr(br, k, current_parameter[k]) strat.br = br # for calculating signals signal_df = strat.construct_signal(spot_df, spot_df2, br.tech_params, br) cash_backtest = CashBacktest() self.logger.info("Calculating... " + pretty_portfolio_names[i]) cash_backtest.calculate_trading_PnL(br, asset_df, signal_df) tsd_list.append(cash_backtest.get_portfolio_pnl_tsd()) stats = str(cash_backtest.get_portfolio_pnl_desc()[0]) port = cash_backtest.get_cumportfolio().resample('B') port.columns = [pretty_portfolio_names[i] + ' ' + stats] if port_list is None: port_list = port else: port_list = port_list.join(port) # reset the parameters of the strategy strat.br = strat.fill_backtest_request() pf = PlotFactory() gp = GraphProperties() ir = [t.inforatio()[0] for t in tsd_list] # gp.color = 'Blues' # plot all the variations gp.resample = 'B' gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + '.png' gp.scale_factor = self.scale_factor gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type pf.plot_line_graph(port_list, adapter='pythalesians', gp=gp) # plot all the IR in a bar chart form (can be easier to read!) gp = GraphProperties() gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + ' IR.png' gp.scale_factor = self.scale_factor gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type summary = pandas.DataFrame(index=pretty_portfolio_names, data=ir, columns=['IR']) pf.plot_bar_graph(summary, adapter='pythalesians', gp=gp) return port_list ###### Parameters and signal generations (need to be customised for every model) ###### Plot all the output seperately def run_arbitrary_sensitivity_separately(self, strat, parameter_list=None, pretty_portfolio_names=None, strip=None): # asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets() final_strategy = strat.FINAL_STRATEGY for i in range(0, len(parameter_list)): br = strat.fill_backtest_request() current_parameter = parameter_list[i] # for calculating P&L for k in current_parameter.keys(): setattr(br, k, current_parameter[k]) strat.FINAL_STRATEGY = final_strategy + " " + pretty_portfolio_names[ i] self.logger.info("Calculating... " + pretty_portfolio_names[i]) strat.br = br strat.construct_strategy(br=br) strat.plot_strategy_pnl() strat.plot_strategy_leverage() strat.plot_strategy_group_benchmark_pnl(strip=strip) # reset the parameters of the strategy strat.br = strat.fill_backtest_request() strat.FINAL_STRATEGY = final_strategy def run_day_of_month_analysis(self, strat): from pythalesians.economics.seasonality.seasonality import Seasonality from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs tsc = TimeSeriesCalcs() seas = Seasonality() strat.construct_strategy() pnl = strat.get_strategy_pnl() # get seasonality by day of the month pnl = pnl.resample('B') rets = tsc.calculate_returns(pnl) bus_day = seas.bus_day_of_month_seasonality(rets, add_average=True) # get seasonality by month pnl = pnl.resample('BM') rets = tsc.calculate_returns(pnl) month = seas.monthly_seasonality(rets) self.logger.info("About to plot seasonality...") gp = GraphProperties() pf = PlotFactory() # Plotting spot over day of month/month of year gp.color = 'Blues' gp.scale_factor = self.scale_factor gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality day of month.png' gp.title = strat.FINAL_STRATEGY + ' day of month seasonality' gp.display_legend = False gp.color_2_series = [bus_day.columns[-1]] gp.color_2 = ['red'] # red, pink gp.linewidth_2 = 4 gp.linewidth_2_series = [bus_day.columns[-1]] gp.y_axis_2_series = [bus_day.columns[-1]] pf.plot_line_graph(bus_day, adapter='pythalesians', gp=gp) gp = GraphProperties() gp.scale_factor = self.scale_factor gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality month of year.png' gp.title = strat.FINAL_STRATEGY + ' month of year seasonality' pf.plot_line_graph(month, adapter='pythalesians', gp=gp) return month
class FXCrossFactory: def __init__(self): self.logger = LoggerManager().getLogger(__name__) self.fxconv = FXConv() if Constants().default_time_series_factory == 'lighttimeseriesfactory': self.time_series_factory = LightTimeSeriesFactory() else: self.time_series_factory = CachedTimeSeriesFactory() return def get_fx_cross_tick(self, start, end, cross, cut = "NYC", source = "gain", cache_algo='cache_algo_return', type = 'spot'): if isinstance(cross, str): cross = [cross] time_series_request = TimeSeriesRequest() time_series_factory = self.time_series_factory data_frame_agg = None time_series_request.gran_freq = "tick" # tick time_series_request.freq_mult = 1 # 1 min time_series_request.cut = cut # NYC/BGN ticker time_series_request.fields = ['bid', 'ask'] # bid/ask field only time_series_request.cache_algo = cache_algo # cache_algo_only, cache_algo_return, internet_load time_series_request.environment = 'backtest' time_series_request.start_date = start time_series_request.finish_date = end time_series_request.data_source = source time_series_request.category = 'fx' for cr in cross: if (type == 'spot'): time_series_request.tickers = cr cross_vals = time_series_factory.harvest_time_series(time_series_request) cross_vals.columns = [cr + '.bid', cr + '.ask'] if data_frame_agg is None: data_frame_agg = cross_vals else: data_frame_agg = data_frame_agg.join(cross_vals, how='outer') # strip the nan elements data_frame_agg = data_frame_agg.dropna() return data_frame_agg def get_fx_cross(self, start, end, cross, cut = "NYC", source = "bloomberg", freq = "intraday", cache_algo='cache_algo_return', type = 'spot'): if source == "gain" or source == 'dukascopy' or freq == 'tick': return self.get_fx_cross_tick(start, end, cross, cut = cut, source = source, cache_algo='cache_algo_return', type = 'spot') if isinstance(cross, str): cross = [cross] time_series_request = TimeSeriesRequest() time_series_factory = self.time_series_factory time_series_calcs = TimeSeriesCalcs() data_frame_agg = None if freq == 'intraday': time_series_request.gran_freq = "minute" # intraday elif freq == 'daily': time_series_request.gran_freq = "daily" # intraday time_series_request.freq_mult = 1 # 1 min time_series_request.cut = cut # NYC/BGN ticker time_series_request.fields = 'close' # close field only time_series_request.cache_algo = cache_algo # cache_algo_only, cache_algo_return, internet_load time_series_request.environment = 'backtest' time_series_request.start_date = start time_series_request.finish_date = end time_series_request.data_source = source for cr in cross: base = cr[0:3] terms = cr[3:6] if (type == 'spot'): # non-USD crosses if base != 'USD' and terms != 'USD': base_USD = self.fxconv.correct_notation('USD' + base) terms_USD = self.fxconv.correct_notation('USD' + terms) # TODO check if the cross exists in the database # download base USD cross time_series_request.tickers = base_USD time_series_request.category = self.fxconv.em_or_g10(base, freq) base_vals = time_series_factory.harvest_time_series(time_series_request) # download terms USD cross time_series_request.tickers = terms_USD time_series_request.category = self.fxconv.em_or_g10(terms, freq) terms_vals = time_series_factory.harvest_time_series(time_series_request) if (base_USD[0:3] == 'USD'): base_vals = 1 / base_vals if (terms_USD[0:3] == 'USD'): terms_vals = 1 / terms_vals base_vals.columns = ['temp'] terms_vals.columns = ['temp'] cross_vals = base_vals.div(terms_vals, axis = 'index') cross_vals.columns = [cr + '.close'] else: if base == 'USD': non_USD = terms if terms == 'USD': non_USD = base correct_cr = self.fxconv.correct_notation(cr) time_series_request.tickers = correct_cr time_series_request.category = self.fxconv.em_or_g10(non_USD, freq) cross_vals = time_series_factory.harvest_time_series(time_series_request) # flip if not convention if(correct_cr != cr): cross_vals = 1 / cross_vals cross_vals.columns.names = [cr + '.close'] elif type[0:3] == "tot": if freq == 'daily': # download base USD cross time_series_request.tickers = base + 'USD' time_series_request.category = self.fxconv.em_or_g10(base, freq) + '-tot' if type == "tot": base_vals = time_series_factory.harvest_time_series(time_series_request) else: x = 0 # download terms USD cross time_series_request.tickers = terms + 'USD' time_series_request.category = self.fxconv.em_or_g10(terms, freq) + '-tot' if type == "tot": terms_vals = time_series_factory.harvest_time_series(time_series_request) else: x = 0 base_rets = time_series_calcs.calculate_returns(base_vals) terms_rets = time_series_calcs.calculate_returns(terms_vals) cross_rets = base_rets.sub(terms_rets.iloc[:,0],axis=0) # first returns of a time series will by NaN, given we don't know previous point cross_rets.iloc[0] = 0 cross_vals = time_series_calcs.create_mult_index(cross_rets) cross_vals.columns = [cr + '-tot.close'] elif freq == 'intraday': self.logger.info('Total calculated returns for intraday not implemented yet') return None if data_frame_agg is None: data_frame_agg = cross_vals else: data_frame_agg = data_frame_agg.join(cross_vals, how='outer') # strip the nan elements data_frame_agg = data_frame_agg.dropna() return data_frame_agg
class BBGLowLevelTick(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelTick, self).__init__() self.logger = LoggerManager().getLogger(__name__) # constants self.TICK_DATA = blpapi.Name("tickData") self.COND_CODE = blpapi.Name("conditionCodes") self.TICK_SIZE = blpapi.Name("size") self.TIME = blpapi.Name("time") self.TYPE = blpapi.Name("type") self.VALUE = blpapi.Name("value") self.RESPONSE_ERROR = blpapi.Name("responseError") self.CATEGORY = blpapi.Name("category") self.MESSAGE = blpapi.Name("message") self.SESSION_TERMINATED = blpapi.Name("SessionTerminated") def combine_slices(self, data_frame, data_frame_slice): return data_frame.append(data_frame_slice) # populate options for Bloomberg request for asset intraday request def fill_options(self, time_series_request): self._options = OptionsBBG() self._options.security = time_series_request.tickers[0] # get 1st ticker only! self._options.event = time_series_request.trade_side.upper() # self._options.barInterval = time_series_request.freq_mult self._options.startDateTime = time_series_request.start_date self._options.endDateTime = time_series_request.finish_date # self._options.gapFillInitialBar = False if hasattr(self._options.startDateTime, "microsecond"): self._options.startDateTime = self._options.startDateTime.replace(microsecond=0) if hasattr(self._options.endDateTime, "microsecond"): self._options.endDateTime = self._options.endDateTime.replace(microsecond=0) return self._options # iterate through Bloomberg output creating a DataFrame output # implements abstract method def process_message(self, msg): data = msg.getElement(self.TICK_DATA).getElement(self.TICK_DATA) self.logger.info("Processing tick data for " + str(self._options.security)) tuple = [] data_vals = data.values() # for item in list(data_vals): # if item.hasElement(self.COND_CODE): # cc = item.getElementAsString(self.COND_CODE) # else: # cc = "" # # # each price time point has multiple fields - marginally quicker # tuple.append(([item.getElementAsFloat(self.VALUE), # item.getElementAsInteger(self.TICK_SIZE)], # item.getElementAsDatetime(self.TIME))) # slightly faster this way (note, we are skipping trade & CC fields) tuple = [ ( [item.getElementAsFloat(self.VALUE), item.getElementAsInteger(self.TICK_SIZE)], item.getElementAsDatetime(self.TIME), ) for item in data_vals ] data_table = list(map(itemgetter(0), tuple)) time_list = list(map(itemgetter(1), tuple)) try: self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1])) except: self.logger.info("No dates retrieved") return None # create pandas dataframe with the Bloomberg output return pandas.DataFrame(data=data_table, index=time_list, columns=["close", "ticksize"]) # implement abstract method: create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("IntradayTickRequest") # only one security/eventType per request request.set("security", self._options.security) request.getElement("eventTypes").appendValue("TRADE") # request.set("eventTypes", self._options.event) request.set("includeConditionCodes", True) # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT') if self._options.startDateTime and self._options.endDateTime: request.set("startDateTime", self._options.startDateTime) request.set("endDateTime", self._options.endDateTime) self.logger.info("Sending Tick Bloomberg Request...") session.sendRequest(request)
class HistoricalDataRequest(Request): def __init__(self, symbols, fields, start=None, end=None, period='DAILY', addtl_sets=None, ignore_security_error=0, ignore_field_error=0): """ Historical data request for bbg. Parameters ---------- symbols : string or list fields : string or list start : start date (if None then use 1 year ago) end : end date (if None then use today) period : ('DAILY', 'WEEKLY', 'MONTHLY', 'QUARTERLY', 'SEMI-ANNUAL', 'YEARLY') ignore_field_errors : bool ignore_security_errors : bool """ Request.__init__(self, ignore_security_error=ignore_security_error, ignore_field_error=ignore_field_error) assert period in ('DAILY', 'WEEKLY', 'MONTHLY', 'QUARTERLY', 'SEMI-ANNUAL', 'YEARLY') self.symbols = isinstance(symbols, str) and [symbols] or symbols self.fields = isinstance(fields, str) and [fields] or fields if start is None: start = datetime.today() - timedelta( 365) # by default download the past year if end is None: end = datetime.today() self.start = to_datetime(start) self.end = to_datetime(end) self.period = period self.logger = LoggerManager().getLogger(__name__) # response related self.response = {} def get_bbg_service_name(self): return '//blp/refdata' def get_bbg_request(self, svc, session): # create the bbg request object request = svc.CreateRequest('HistoricalDataRequest') [ request.GetElement('securities').AppendValue(sec) for sec in self.symbols ] [request.GetElement('fields').AppendValue(fld) for fld in self.fields] request.Set('startDate', self.start.strftime('%Y%m%d')) request.Set('endDate', self.end.strftime('%Y%m%d')) request.Set('periodicitySelection', self.period) o = request.GetElement('overrides').AppendElment() o.SetElement('fieldId', 'TIME_ZONE_OVERRIDE') o.SetElement('value', 'GMT') return request def on_security_data_node(self, node): """ process a securityData node - FIXME: currently not handling relateDate node """ sid = XmlHelper.get_child_value(node, 'security') farr = node.GetElement('fieldData') dmap = defaultdict(list) self.logger.info("Fetching ticker " + sid) for i in range(farr.NumValues): pt = farr.GetValue(i) [ dmap[f].append(XmlHelper.get_child_value(pt, f)) for f in ['date'] + self.fields ] self.logger.info("Returning ticker " + sid) idx = dmap.pop('date') frame = DataFrame(dmap, columns=self.fields, index=idx) frame.index.name = 'date' self.response[sid] = frame def on_event(self, evt, is_final): """ on_event - This is invoked from in response to COM PumpWaitingMessages - different thread """ for msg in XmlHelper.message_iter(evt): # Single security element in historical request node = msg.GetElement('securityData') if node.HasElement('securityError'): self.security_errors.append( XmlHelper.as_security_error( node.GetElement('securityError'))) else: self.on_security_data_node(node) def response_as_single(self, copy=0): """ response_as_single - convert the response map to a single data frame with Multi-Index columns """ arr = [] for sid, frame in self.response.items(): if copy: frame = frame.copy() 'security' not in frame and frame.insert(0, 'security', sid) arr.append(frame.reset_index().set_index(['date', 'security'])) # time.sleep(1000) if (arr == []): return arr return concat(arr).unstack() def response_as_panel(self, swap=False): panel = Panel(self.response) if swap: panel = panel.swapaxes('items', 'minor') return panel
class BBGLowLevelDaily(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelDaily, self).__init__() self.logger = LoggerManager().getLogger(__name__) self._options = [] def combine_slices(self, data_frame, data_frame_slice): if (data_frame_slice.columns.get_level_values(1).values[0] not in data_frame.columns.get_level_values(1).values): return data_frame.join(data_frame_slice, how="outer") return data_frame # populate options for Bloomberg request for asset daily request def fill_options(self, time_series_request): self._options = OptionsBBG() self._options.security = time_series_request.tickers self._options.startDateTime = time_series_request.start_date self._options.endDateTime = time_series_request.finish_date self._options.fields = time_series_request.fields return self._options def process_message(self, msg): # Process received events ticker = msg.getElement('securityData').getElement( 'security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') # SLOW loop (careful, not all the fields will be returned every time # hence need to include the field name in the tuple) data = defaultdict(dict) for i in range(fieldData.numValues()): for j in range(1, fieldData.getValue(i).numElements()): data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \ = fieldData.getValue(i).getElement(j).getValue() data_frame = pandas.DataFrame(data) # if obsolete ticker could return no values if (not (data_frame.empty)): # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker']) data_frame.index = pandas.to_datetime(data_frame.index) self.logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) else: return None return data_frame # create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("HistoricalDataRequest") request.set("startDate", self._options.startDateTime.strftime('%Y%m%d')) request.set("endDate", self._options.endDateTime.strftime('%Y%m%d')) # # only one security/eventType per request for field in self._options.fields: request.getElement("fields").appendValue(field) for security in self._options.security: request.getElement("securities").appendValue(security) self.logger.info("Sending Bloomberg Daily Request:" + str(request)) session.sendRequest(request)
class BBGLowLevelDaily(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelDaily, self).__init__() self.logger = LoggerManager().getLogger(__name__) self._options = [] def combine_slices(self, data_frame, data_frame_slice): if (data_frame_slice.columns.get_level_values(1).values[0] not in data_frame.columns.get_level_values(1).values): return data_frame.join(data_frame_slice, how="outer") return data_frame # populate options for Bloomberg request for asset daily request def fill_options(self, time_series_request): self._options = OptionsBBG() self._options.security = time_series_request.tickers self._options.startDateTime = time_series_request.start_date self._options.endDateTime = time_series_request.finish_date self._options.fields = time_series_request.fields return self._options def process_message(self, msg): # Process received events ticker = msg.getElement('securityData').getElement('security').getValue() fieldData = msg.getElement('securityData').getElement('fieldData') # SLOW loop (careful, not all the fields will be returned every time # hence need to include the field name in the tuple) data = defaultdict(dict) for i in range(fieldData.numValues()): for j in range(1, fieldData.getValue(i).numElements()): data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \ = fieldData.getValue(i).getElement(j).getValue() data_frame = pandas.DataFrame(data) # if obsolete ticker could return no values if (not(data_frame.empty)): # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker']) data_frame.index = pandas.to_datetime(data_frame.index) self.logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1])) else: return None return data_frame # create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("HistoricalDataRequest") request.set("startDate", self._options.startDateTime.strftime('%Y%m%d')) request.set("endDate", self._options.endDateTime.strftime('%Y%m%d')) # # only one security/eventType per request for field in self._options.fields: request.getElement("fields").appendValue(field) for security in self._options.security: request.getElement("securities").appendValue(security) self.logger.info("Sending Bloomberg Daily Request:" + str(request)) session.sendRequest(request)
class LoaderPandasWeb(LoaderTemplate): def __init__(self): super(LoaderPandasWeb, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, time_series_request): time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request) self.logger.info("Request Pandas Web data") data_frame = self.download_daily(time_series_request_vendor) if time_series_request_vendor.data_source == "fred": returned_fields = ["close" for x in data_frame.columns.values] returned_tickers = data_frame.columns.values else: data_frame = data_frame.to_frame().unstack() # print(data_frame.tail()) if data_frame.index is []: return None # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) if data_frame is not None: fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) ticker_requested = [] for f in time_series_request.fields: for t in time_series_request.tickers: ticker_requested.append(t + "." + f) data_frame.columns = ticker_combined data_frame.index.name = "Date" # only return the requested tickers data_frame = pandas.DataFrame( data=data_frame[ticker_requested], index=data_frame.index, columns=ticker_requested ) self.logger.info("Completed request from Pandas Web.") return data_frame def download_daily(self, time_series_request): return web.DataReader( time_series_request.tickers, time_series_request.data_source, time_series_request.start_date, time_series_request.finish_date, )
class BBGLowLevelIntraday(BBGLowLevelTemplate): def __init__(self): super(BBGLowLevelIntraday, self).__init__() self.logger = LoggerManager().getLogger(__name__) # constants self.BAR_DATA = blpapi.Name("barData") self.BAR_TICK_DATA = blpapi.Name("barTickData") self.OPEN = blpapi.Name("open") self.HIGH = blpapi.Name("high") self.LOW = blpapi.Name("low") self.CLOSE = blpapi.Name("close") self.VOLUME = blpapi.Name("volume") self.NUM_EVENTS = blpapi.Name("numEvents") self.TIME = blpapi.Name("time") def combine_slices(self, data_frame, data_frame_slice): return data_frame.append(data_frame_slice) # populate options for Bloomberg request for asset intraday request def fill_options(self, time_series_request): self._options = OptionsBBG() self._options.security = time_series_request.tickers[0] # get 1st ticker only! self._options.event = "TRADE" self._options.barInterval = time_series_request.freq_mult self._options.startDateTime = time_series_request.start_date self._options.endDateTime = time_series_request.finish_date self._options.gapFillInitialBar = False if hasattr(self._options.startDateTime, 'microsecond'): self._options.startDateTime = self._options.startDateTime.replace(microsecond=0) if hasattr(self._options.endDateTime, 'microsecond'): self._options.endDateTime = self._options.endDateTime.replace(microsecond=0) return self._options # iterate through Bloomberg output creating a DataFrame output # implements abstract method def process_message(self, msg): data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA) self.logger.info("Processing intraday data for " + str(self._options.security)) data_vals = list(data.values()) # data_matrix = numpy.zeros([len(data_vals), 6]) # data_matrix.fill(numpy.nan) # # date_index = [None] * len(data_vals) # # for i in range(0, len(data_vals)): # data_matrix[i][0] = data_vals[i].getElementAsFloat(self.OPEN) # data_matrix[i][1] = data_vals[i].getElementAsFloat(self.HIGH) # data_matrix[i][2] = data_vals[i].getElementAsFloat(self.LOW) # data_matrix[i][3] = data_vals[i].getElementAsFloat(self.CLOSE) # data_matrix[i][4] = data_vals[i].getElementAsInteger(self.VOLUME) # data_matrix[i][5] = data_vals[i].getElementAsInteger(self.NUM_EVENTS) # # date_index[i] = data_vals[i].getElementAsDatetime(self.TIME) # # self.logger.info("Dates between " + str(date_index[0]) + " - " + str(date_index[-1])) # # # create pandas dataframe with the Bloomberg output # return pandas.DataFrame(data = data_matrix, index = date_index, # columns=['open', 'high', 'low', 'close', 'volume', 'events']) ## for loop method is touch slower # time_list = [] # data_table = [] # for bar in data_vals: # data_table.append([bar.getElementAsFloat(self.OPEN), # bar.getElementAsFloat(self.HIGH), # bar.getElementAsFloat(self.LOW), # bar.getElementAsFloat(self.CLOSE), # bar.getElementAsInteger(self.VOLUME), # bar.getElementAsInteger(self.NUM_EVENTS)]) # # time_list.append(bar.getElementAsDatetime(self.TIME)) # each price time point has multiple fields - marginally quicker tuple = [([bar.getElementAsFloat(self.OPEN), bar.getElementAsFloat(self.HIGH), bar.getElementAsFloat(self.LOW), bar.getElementAsFloat(self.CLOSE), bar.getElementAsInteger(self.VOLUME), bar.getElementAsInteger(self.NUM_EVENTS)], bar.getElementAsDatetime(self.TIME)) for bar in data_vals] data_table = list(map(itemgetter(0), tuple)) time_list = list(map(itemgetter(1), tuple)) try: self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1])) except: self.logger.info("No dates retrieved") return None # create pandas dataframe with the Bloomberg output return pandas.DataFrame(data = data_table, index = time_list, columns=['open', 'high', 'low', 'close', 'volume', 'events']) # implement abstract method: create request for data def send_bar_request(self, session, eventQueue): refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("IntradayBarRequest") # only one security/eventType per request request.set("security", self._options.security) request.set("eventType", self._options.event) request.set("interval", self._options.barInterval) # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT') if self._options.startDateTime and self._options.endDateTime: request.set("startDateTime", self._options.startDateTime) request.set("endDateTime", self._options.endDateTime) if self._options.gapFillInitialBar: request.append("gapFillInitialBar", True) self.logger.info("Sending Intraday Bloomberg Request...") session.sendRequest(request)
class LoaderDukasCopy(LoaderTemplate): tick_name = "{symbol}/{year}/{month}/{day}/{hour}h_ticks.bi5" def __init__(self): super(LoaderTemplate, self).__init__() self.logger = LoggerManager().getLogger(__name__) import logging logging.getLogger("requests").setLevel(logging.WARNING) # implement method in abstract superclass def load_ticker(self, time_series_request): """ load_ticker - Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- time_series_request : TimeSeriesRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request) data_frame = None self.logger.info("Request Dukascopy data") # doesn't support non-tick data if (time_series_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly', 'intraday', 'minute', 'hourly']): self.logger.warning("Dukascopy loader is for tick data only") return None # assume one ticker only (LightTimeSeriesFactory only calls one ticker at a time) if (time_series_request.freq in ['tick']): # time_series_request_vendor.tickers = time_series_request_vendor.tickers[0] data_frame = self.get_tick(time_series_request, time_series_request_vendor) if data_frame is not None: data_frame.tz_localize('UTC') self.logger.info("Completed request from Dukascopy") return data_frame def kill_session(self): return def get_tick(self, time_series_request, time_series_request_vendor): data_frame = self.download_tick(time_series_request_vendor) # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns returned_tickers = [time_series_request_vendor.tickers[0]] * (len(returned_fields)) if data_frame is not None: fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' return data_frame def download_tick(self, time_series_request): symbol = time_series_request.tickers[0] df_list = [] self.logger.info("About to download from Dukascopy... for " + symbol) # single threaded df_list = [self.fetch_file(time, symbol) for time in self.hour_range(time_series_request.start_date, time_series_request.finish_date)] # parallel (has pickle issues) # time_list = self.hour_range(time_series_request.start_date, time_series_request.finish_date) # df_list = Parallel(n_jobs=-1)(delayed(self.fetch_file)(time, symbol) for time in time_list) try: return pandas.concat(df_list) except: return None def fetch_file(self, time, symbol): if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time)) tick_path = self.tick_name.format( symbol = symbol, year = str(time.year).rjust(4, '0'), month = str(time.month).rjust(2, '0'), day = str(time.day).rjust(2, '0'), hour = str(time.hour).rjust(2, '0') ) tick = self.fetch_tick(Constants().dukascopy_base_url + tick_path) if Constants().dukascopy_write_temp_tick_disk: out_path = Constants().temp_pythalesians_folder + "/dkticks/" + tick_path if not os.path.exists(out_path): if not os.path.exists(os.path.dirname(out_path)): os.makedirs(os.path.dirname(out_path)) self.write_tick(tick, out_path) try: return self.retrieve_df(lzma.decompress(tick), symbol, time) except: return None def fetch_tick(self, tick_url): i = 0 tick_request = None # try up to 5 times to download while i < 5: try: tick_request = requests.get(tick_url) i = 5 except: i = i + 1 if (tick_request is None): self.logger("Failed to download from " + tick_url) return None return tick_request.content def write_tick(self, content, out_path): data_file = open(out_path, "wb+") data_file.write(content) data_file.close() def chunks(self, list, n): if n < 1: n = 1 return [list[i:i + n] for i in range(0, len(list), n)] def retrieve_df(self, data, symbol, epoch): date, tuple = pythalesians.market.loaders.lowlevel.brokers.parserows.parse_tick_data(data, epoch) df = pandas.DataFrame(data = tuple, columns=['temp', 'bid', 'ask', 'bidv', 'askv'], index = date) df.drop('temp', axis = 1) df.index.name = 'Date' divisor = 100000 # where JPY is the terms currency we have different divisor if symbol[3:6] == 'JPY': divisor = 1000 # prices are returned without decimal point df['bid'] = df['bid'] / divisor df['ask'] = df['ask'] / divisor return df def hour_range(self, start_date, end_date): delta_t = end_date - start_date delta_hours = (delta_t.days * 24.0) + (delta_t.seconds / 3600.0) for n in range(int (delta_hours)): yield start_date + timedelta(0, 0, 0, 0, 0, n) # Hours def get_daily_data(self): pass
class TradeAnalysis: def __init__(self): self.logger = LoggerManager().getLogger(__name__) self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' ' self.scale_factor = 3 return def run_strategy_returns_stats(self, strategy): """ run_strategy_returns_stats - Plots useful statistics for the trading strategy (using PyFolio) Parameters ---------- strategy : StrategyTemplate defining trading strategy """ pnl = strategy.get_strategy_pnl() tz = TimeSeriesTimezone() tsc = TimeSeriesCalcs() # PyFolio assumes UTC time based DataFrames (so force this localisation) try: pnl = tz.localise_index_as_UTC(pnl) except: pass # set the matplotlib style sheet & defaults try: matplotlib.rcdefaults() plt.style.use(Constants().plotfactory_pythalesians_style_sheet['pythalesians']) except: pass # TODO for intraday strategies, make daily # convert DataFrame (assumed to have only one column) to Series pnl = tsc.calculate_returns(pnl) pnl = pnl[pnl.columns[0]] fig = pf.create_returns_tear_sheet(pnl, return_fig=True) try: plt.savefig (strategy.DUMP_PATH + "stats.png") except: pass plt.show() def run_tc_shock(self, strategy, tc = None): if tc is None: tc = [0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0] parameter_list = [{'spot_tc_bp' : x } for x in tc] pretty_portfolio_names = [str(x) + 'bp' for x in tc] # names of the portfolio parameter_type = 'TC analysis' # broad type of parameter name return self.run_arbitrary_sensitivity(strategy, parameter_list=parameter_list, pretty_portfolio_names=pretty_portfolio_names, parameter_type=parameter_type) ###### Parameters and signal generations (need to be customised for every model) def run_arbitrary_sensitivity(self, strat, parameter_list = None, parameter_names = None, pretty_portfolio_names = None, parameter_type = None): asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets() port_list = None tsd_list = [] for i in range(0, len(parameter_list)): br = strat.fill_backtest_request() current_parameter = parameter_list[i] # for calculating P&L for k in current_parameter.keys(): setattr(br, k, current_parameter[k]) strat.br = br # for calculating signals signal_df = strat.construct_signal(spot_df, spot_df2, br.tech_params, br) cash_backtest = CashBacktest() self.logger.info("Calculating... " + pretty_portfolio_names[i]) cash_backtest.calculate_trading_PnL(br, asset_df, signal_df) tsd_list.append(cash_backtest.get_portfolio_pnl_tsd()) stats = str(cash_backtest.get_portfolio_pnl_desc()[0]) port = cash_backtest.get_cumportfolio().resample('B').mean() port.columns = [pretty_portfolio_names[i] + ' ' + stats] if port_list is None: port_list = port else: port_list = port_list.join(port) # reset the parameters of the strategy strat.br = strat.fill_backtest_request() pf = PlotFactory() gp = GraphProperties() ir = [t.inforatio()[0] for t in tsd_list] # gp.color = 'Blues' # plot all the variations gp.resample = 'B' gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + '.png' gp.scale_factor = self.scale_factor gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type pf.plot_line_graph(port_list, adapter = 'pythalesians', gp = gp) # plot all the IR in a bar chart form (can be easier to read!) gp = GraphProperties() gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + ' IR.png' gp.scale_factor = self.scale_factor gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type summary = pandas.DataFrame(index = pretty_portfolio_names, data = ir, columns = ['IR']) pf.plot_bar_graph(summary, adapter = 'pythalesians', gp = gp) return port_list ###### Parameters and signal generations (need to be customised for every model) ###### Plot all the output seperately def run_arbitrary_sensitivity_separately(self, strat, parameter_list = None, pretty_portfolio_names = None, strip = None): # asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets() final_strategy = strat.FINAL_STRATEGY for i in range(0, len(parameter_list)): br = strat.fill_backtest_request() current_parameter = parameter_list[i] # for calculating P&L for k in current_parameter.keys(): setattr(br, k, current_parameter[k]) strat.FINAL_STRATEGY = final_strategy + " " + pretty_portfolio_names[i] self.logger.info("Calculating... " + pretty_portfolio_names[i]) strat.br = br strat.construct_strategy(br = br) strat.plot_strategy_pnl() strat.plot_strategy_leverage() strat.plot_strategy_group_benchmark_pnl(strip = strip) # reset the parameters of the strategy strat.br = strat.fill_backtest_request() strat.FINAL_STRATEGY = final_strategy def run_day_of_month_analysis(self, strat): from pythalesians.economics.seasonality.seasonality import Seasonality from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs tsc = TimeSeriesCalcs() seas = Seasonality() strat.construct_strategy() pnl = strat.get_strategy_pnl() # get seasonality by day of the month pnl = pnl.resample('B').mean() rets = tsc.calculate_returns(pnl) bus_day = seas.bus_day_of_month_seasonality(rets, add_average = True) # get seasonality by month pnl = pnl.resample('BM').mean() rets = tsc.calculate_returns(pnl) month = seas.monthly_seasonality(rets) self.logger.info("About to plot seasonality...") gp = GraphProperties() pf = PlotFactory() # Plotting spot over day of month/month of year gp.color = 'Blues' gp.scale_factor = self.scale_factor gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality day of month.png' gp.title = strat.FINAL_STRATEGY + ' day of month seasonality' gp.display_legend = False gp.color_2_series = [bus_day.columns[-1]] gp.color_2 = ['red'] # red, pink gp.linewidth_2 = 4 gp.linewidth_2_series = [bus_day.columns[-1]] gp.y_axis_2_series = [bus_day.columns[-1]] pf.plot_line_graph(bus_day, adapter = 'pythalesians', gp = gp) gp = GraphProperties() gp.scale_factor = self.scale_factor gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality month of year.png' gp.title = strat.FINAL_STRATEGY + ' month of year seasonality' pf.plot_line_graph(month, adapter = 'pythalesians', gp = gp) return month
class LoaderDukasCopy(LoaderTemplate): tick_name = "{symbol}/{year}/{month}/{day}/{hour}h_ticks.bi5" def __init__(self): super(LoaderTemplate, self).__init__() self.logger = LoggerManager().getLogger(__name__) import logging logging.getLogger("requests").setLevel(logging.WARNING) # implement method in abstract superclass def load_ticker(self, time_series_request): """ load_ticker - Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- time_series_request : TimeSeriesRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ time_series_request_vendor = self.construct_vendor_time_series_request( time_series_request) data_frame = None self.logger.info("Request Dukascopy data") # doesn't support non-tick data if (time_series_request.freq in [ 'daily', 'weekly', 'monthly', 'quarterly', 'yearly', 'intraday', 'minute', 'hourly' ]): self.logger.warning("Dukascopy loader is for tick data only") return None # assume one ticker only (LightTimeSeriesFactory only calls one ticker at a time) if (time_series_request.freq in ['tick']): # time_series_request_vendor.tickers = time_series_request_vendor.tickers[0] data_frame = self.get_tick(time_series_request, time_series_request_vendor) if data_frame is not None: data_frame.tz_localize('UTC') self.logger.info("Completed request from Dukascopy") return data_frame def kill_session(self): return def get_tick(self, time_series_request, time_series_request_vendor): data_frame = self.download_tick(time_series_request_vendor) # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns returned_tickers = [time_series_request_vendor.tickers[0] ] * (len(returned_fields)) if data_frame is not None: fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker( returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' return data_frame def download_tick(self, time_series_request): symbol = time_series_request.tickers[0] df_list = [] self.logger.info("About to download from Dukascopy... for " + symbol) # single threaded df_list = [ self.fetch_file(time, symbol) for time in self.hour_range(time_series_request.start_date, time_series_request.finish_date) ] # parallel (has pickle issues) # time_list = self.hour_range(time_series_request.start_date, time_series_request.finish_date) # df_list = Parallel(n_jobs=-1)(delayed(self.fetch_file)(time, symbol) for time in time_list) try: return pandas.concat(df_list) except: return None def fetch_file(self, time, symbol): if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time)) tick_path = self.tick_name.format(symbol=symbol, year=str(time.year).rjust(4, '0'), month=str(time.month).rjust(2, '0'), day=str(time.day).rjust(2, '0'), hour=str(time.hour).rjust(2, '0')) tick = self.fetch_tick(Constants().dukascopy_base_url + tick_path) if Constants().dukascopy_write_temp_tick_disk: out_path = Constants( ).temp_pythalesians_folder + "/dkticks/" + tick_path if not os.path.exists(out_path): if not os.path.exists(os.path.dirname(out_path)): os.makedirs(os.path.dirname(out_path)) self.write_tick(tick, out_path) try: return self.retrieve_df(lzma.decompress(tick), symbol, time) except: return None def fetch_tick(self, tick_url): i = 0 tick_request = None # try up to 5 times to download while i < 5: try: tick_request = requests.get(tick_url) i = 5 except: i = i + 1 if (tick_request is None): self.logger("Failed to download from " + tick_url) return None return tick_request.content def write_tick(self, content, out_path): data_file = open(out_path, "wb+") data_file.write(content) data_file.close() def chunks(self, list, n): if n < 1: n = 1 return [list[i:i + n] for i in range(0, len(list), n)] def retrieve_df(self, data, symbol, epoch): date, tuple = pythalesians.market.loaders.lowlevel.brokers.parserows.parse_tick_data( data, epoch) df = pandas.DataFrame(data=tuple, columns=['temp', 'bid', 'ask', 'bidv', 'askv'], index=date) df.drop('temp', axis=1) df.index.name = 'Date' divisor = 100000 # where JPY is the terms currency we have different divisor if symbol[3:6] == 'JPY': divisor = 1000 # prices are returned without decimal point df['bid'] = df['bid'] / divisor df['ask'] = df['ask'] / divisor return df def hour_range(self, start_date, end_date): delta_t = end_date - start_date delta_hours = (delta_t.days * 24.0) + (delta_t.seconds / 3600.0) for n in range(int(delta_hours)): yield start_date + timedelta(0, 0, 0, 0, 0, n) # Hours def get_daily_data(self): pass
class StrategyFXCTA_Example(StrategyTemplate): def __init__(self): super(StrategyTemplate, self).__init__() self.logger = LoggerManager().getLogger(__name__) ##### FILL IN WITH YOUR OWN PARAMETERS FOR display, dumping, TSF etc. self.tsfactory = LightTimeSeriesFactory() self.DUMP_CSV = 'output_data/' self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' ' self.FINAL_STRATEGY = 'Thalesians FX CTA' self.SCALE_FACTOR = 3 return ###### Parameters and signal generations (need to be customised for every model) def fill_backtest_request(self): ##### FILL IN WITH YOUR OWN BACKTESTING PARAMETERS br = BacktestRequest() # get all asset data br.start_date = "04 Jan 1989" br.finish_date = datetime.datetime.utcnow() br.spot_tc_bp = 0.5 br.ann_factor = 252 br.plot_start = "01 Apr 2015" br.calc_stats = True br.write_csv = False br.plot_interim = True br.include_benchmark = True # have vol target for each signal br.signal_vol_adjust = True br.signal_vol_target = 0.1 br.signal_vol_max_leverage = 5 br.signal_vol_periods = 20 br.signal_vol_obs_in_year = 252 br.signal_vol_rebalance_freq = 'BM' br.signal_vol_resample_freq = None # have vol target for portfolio br.portfolio_vol_adjust = True br.portfolio_vol_target = 0.1 br.portfolio_vol_max_leverage = 5 br.portfolio_vol_periods = 20 br.portfolio_vol_obs_in_year = 252 br.portfolio_vol_rebalance_freq = 'BM' br.portfolio_vol_resample_freq = None # tech params br.tech_params.sma_period = 200 return br def fill_assets(self): ##### FILL IN WITH YOUR ASSET DATA # for FX basket full_bkt = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD', 'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK'] basket_dict = {} for i in range(0, len(full_bkt)): basket_dict[full_bkt[i]] = [full_bkt[i]] basket_dict['Thalesians FX CTA'] = full_bkt br = self.fill_backtest_request() self.logger.info("Loading asset data...") vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS', 'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS'] time_series_request = TimeSeriesRequest( start_date = br.start_date, # start date finish_date = br.finish_date, # finish date freq = 'daily', # daily data data_source = 'quandl', # use Quandl as data source tickers = full_bkt, # ticker (Thalesians) fields = ['close'], # which fields to download vendor_tickers = vendor_tickers, # ticker (Quandl) vendor_fields = ['close'], # which Bloomberg fields to download cache_algo = 'internet_load_return') # how to return data asset_df = self.tsfactory.harvest_time_series(time_series_request) # signalling variables spot_df = asset_df spot_df2 = None return asset_df, spot_df, spot_df2, basket_dict def construct_signal(self, spot_df, spot_df2, tech_params, br): ##### FILL IN WITH YOUR OWN SIGNALS # use technical indicator to create signals # (we could obviously create whatever function we wanted for generating the signal dataframe) tech_ind = TechIndicator() tech_ind.create_tech_ind(spot_df, 'SMA', tech_params); signal_df = tech_ind.get_signal() return signal_df def construct_strategy_benchmark(self): ###### FILL IN WITH YOUR OWN BENCHMARK tsr_indices = TimeSeriesRequest( start_date = '01 Jan 1980', # start date finish_date = datetime.datetime.utcnow(), # finish date freq = 'daily', # intraday data data_source = 'quandl', # use Bloomberg as data source tickers = ["EURUSD"], # tickers to download vendor_tickers=['FRED/DEXUSEU'], fields = ['close'], # which fields to download vendor_fields = ['close'], cache_algo = 'cache_algo_return') # how to return data) df = self.tsfactory.harvest_time_series(tsr_indices) df.columns = [x.split(".")[0] for x in df.columns] return df
class LoaderBBG(LoaderTemplate): def __init__(self): super(LoaderBBG, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, time_series_request): """ load_ticker - Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- time_series_request : TimeSeriesRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request) data_frame = None self.logger.info("Request Bloomberg data") # do we need daily or intraday data? if (time_series_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']): # for events times/dates separately needs ReferenceDataRequest (when specified) if 'release-date-time-full' in time_series_request.fields: # experimental datetime_data_frame = self.get_reference_data(time_series_request_vendor, time_series_request) # remove fields 'release-date-time-full' from our request (and the associated field in the vendor) index = time_series_request.fields.index('release-date-time-full') time_series_request_vendor.fields.pop(index) time_series_request.fields.pop(index) # download all the other event fields (uses HistoricalDataRequest to Bloomberg) # concatenate with date time fields if len(time_series_request_vendor.fields) > 0: events_data_frame = self.get_daily_data(time_series_request, time_series_request_vendor) col = events_data_frame.index.name events_data_frame = events_data_frame.reset_index(drop = False) data_frame = pandas.concat([events_data_frame, datetime_data_frame], axis = 1) temp = data_frame[col] del data_frame[col] data_frame.index = temp else: data_frame = datetime_data_frame # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg else: data_frame = self.get_daily_data(time_series_request, time_series_request_vendor) # assume one ticker only # for intraday data we use IntradayDataRequest to Bloomberg if (time_series_request.freq in ['intraday', 'minute', 'hourly']): time_series_request_vendor.tickers = time_series_request_vendor.tickers[0] data_frame = self.download_intraday(time_series_request_vendor) cols = data_frame.columns.values data_frame.tz_localize('UTC') cols = time_series_request.tickers[0] + "." + cols data_frame.columns = cols self.logger.info("Completed request from Bloomberg.") return data_frame def get_daily_data(self, time_series_request, time_series_request_vendor): data_frame = self.download_daily(time_series_request_vendor) # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) if data_frame is not None: # TODO if empty try downloading again a year later fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' return data_frame def get_reference_data(self, time_series_request_vendor, time_series_request): end = datetime.datetime.today() end = end.replace(year = end.year + 1) time_series_request_vendor.finish_date = end self.logger.debug("Requesting ref for " + time_series_request_vendor.tickers[0] + " etc.") data_frame = self.download_ref(time_series_request_vendor) self.logger.debug("Waiting for ref...") # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) if data_frame is not None: # TODO if empty try downloading again a year later fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame = data_frame.convert_objects(convert_dates = 'coerce', convert_numeric= 'coerce') return data_frame # implement method in abstract superclass @abc.abstractmethod def kill_session(self): return @abc.abstractmethod def download_intraday(self, time_series_request): return @abc.abstractmethod def download_daily(self, time_series_request): return @abc.abstractmethod def download_ref(self, time_series_request): return