def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None): """Converts CSV file to HDF5 file Parameters ---------- f_name : str File name to be read category : str data category of file (used in HDF5 filename) freq : str intraday/daily frequency (used in HDF5 filename) cutoff : DateTime (optional) filter dates up to here dateparse : str date parser to use """ logger = LoggerManager().getLogger(__name__) logger.info("About to read... " + f_name) data_frame = self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse) category_f_name = self.create_cache_file_name(category) self.write_time_series_cache_to_disk(category_f_name, data_frame)
def __init__(self, engine = ChartConstants().chartfactory_default_engine): self.logger = LoggerManager().getLogger(__name__) self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' ' self.DEFAULT_PLOT_ENGINE = engine self.chart = Chart(engine=self.DEFAULT_PLOT_ENGINE) return
def send_bar_request(self, session, eventQueue, options, cid): logger = LoggerManager().getLogger(__name__) refDataService = session.getService("//blp/refdata") request = refDataService.createRequest('ReferenceDataRequest') self.add_override(request, 'TIME_ZONE_OVERRIDE', 23) # force GMT time self.add_override(request, 'INCLUDE_EXPIRED_CONTRACTS', "Y") # include expired contracts self.add_override(request, 'START_DT', options.startDateTime.strftime('%Y%m%d')) self.add_override(request, 'END_DT', options.endDateTime.strftime('%Y%m%d')) # Only one security/eventType per request for field in options.fields: request.getElement("fields").appendValue(field) for security in options.security: request.getElement("securities").appendValue(security) # Add user defined overrides for BBG request self.add_override_dict(request, options) logger.info("Sending Bloomberg Ref Request:" + str(request)) session.sendRequest(request=request, correlationId=cid)
def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: LoggerManager().getLogger(__name__).warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument
def process_message(self, msg): data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA) logger = LoggerManager().getLogger(__name__) data_vals = list(data.values()) # Each price time point has multiple fields - marginally quicker tuple = [([ bar.getElementAsFloat(self.OPEN), bar.getElementAsFloat(self.HIGH), bar.getElementAsFloat(self.LOW), bar.getElementAsFloat(self.CLOSE), bar.getElementAsInteger(self.VOLUME), bar.getElementAsInteger(self.NUM_EVENTS) ], bar.getElementAsDatetime(self.TIME)) for bar in data_vals] data_table = list(map(itemgetter(0), tuple)) time_list = list(map(itemgetter(1), tuple)) try: logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1])) except: logger.info("No dates retrieved") return None # create pandas dataframe with the Bloomberg output return pd.DataFrame( data=data_table, index=time_list, columns=["open", "high", "low", "close", "volume", "events"])
def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: LoggerManager().getLogger(__name__).warning(asset & " is not a defined asset.") self.__asset = asset
def send_bar_request(self, session, eventQueue, options, cid): logger = LoggerManager().getLogger(__name__) refDataService = session.getService("//blp/refdata") request = refDataService.createRequest("IntradayBarRequest") # only one security/eventType per request request.set("security", options.security) request.set("eventType", options.event) request.set("interval", options.barInterval) # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT') if options.startDateTime is not None and options.endDateTime is not None: request.set("startDateTime", options.startDateTime) request.set("endDateTime", options.endDateTime) if options.gapFillInitialBar: request.append("gapFillInitialBar", True) # Add user defined overrides for BBG request self.add_override_dict(request, options) logger.info("Sending Intraday Bloomberg Request...") session.sendRequest(request=request, correlationId=cid)
def process_response_event(self, event): data_frame_list = [] logger = LoggerManager().getLogger(__name__) for msg in event: # Generates a lot of output - so don't use unless for # debugging purposes # logger.info(msg) if msg.hasElement(self.RESPONSE_ERROR): logger.error("REQUEST FAILED: " + str(msg.getElement(self.RESPONSE_ERROR))) continue data_frame_slice = self.process_message(msg) if (data_frame_slice is not None): data_frame_list.append(data_frame_slice) if data_frame_list == []: logger.warn("No elements for ticker.") return None else: return pd.concat(data_frame_list)
def __init__(self, data_source = None, start_date ='year', finish_date = datetime.datetime.utcnow(), tickers = None, category = None, freq_mult = 1, freq = "daily", gran_freq = None, cut = "NYC", fields = ['close'], cache_algo = "internet_load_return", vendor_tickers = None, vendor_fields = None, environment = "backtest", trade_side = 'trade' ): self.logger = LoggerManager().getLogger(__name__) self.freq_mult = 1 # define frequency of data self.gran_freq = gran_freq self.freq_mult = freq_mult self.freq = freq # data source, start and fin self.data_source = data_source self.start_date = start_date self.finish_date = finish_date self.tickers = tickers self.category = category # special predefined categories self.cut = cut # closing time of the data (eg. NYC, LDN, TOK etc) self.fields = fields # fields, eg. close, high, low, open self.cache_algo = cache_algo # internet_load_return (cache_algo_return is for future use) self.vendor_tickers = vendor_tickers # define vendor tickers self.vendor_fields = vendor_fields # define vendor fields self.environment = environment # backtest environment only supported at present self.trade_side = trade_side
def send_bar_request(self, session, eventQueue, options, cid): logger = LoggerManager().getLogger(__name__) refDataService = session.getService("//blp/refdata") request = refDataService.createRequest('ReferenceDataRequest') self.add_override(request, 'TIME_ZONE_OVERRIDE', 23) # force GMT time self.add_override(request, 'INCLUDE_EXPIRED_CONTRACTS', "Y") # include expired contracts self.add_override(request, 'START_DT', options.startDateTime.strftime('%Y%m%d')) self.add_override(request, 'END_DT', options.endDateTime.strftime('%Y%m%d')) # only one security/eventType per request for field in options.fields: request.getElement("fields").appendValue(field) for security in options.security: request.getElement("securities").appendValue(security) if options.overrides != {}: for k in options.overrides.keys(): new_k = k # is there a pretty name for this? if k in super().convert_override_fields: new_k = super().convert_override_fields[k] self.add_override(request, new_k, options.overrides[k]) logger.info("Sending Bloomberg Ref Request:" + str(request)) session.sendRequest(request=request, correlationId=cid)
def __init__(self): super(MarketDataRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None # output parameters for backtest (should we add returns statistics on legends, write CSVs with returns etc.) self.__plot_start = None self.__calc_stats = True self.__write_csv = False self.__write_csv_pnl = False self.__plot_interim = False self.__include_benchmark = False self.__tech_params = TechParams() # default parameters for portfolio level vol adjustment self.__portfolio_vol_adjust = False self.__portfolio_vol_period_shift = 0 self.__portfolio_vol_rebalance_freq = None self.__portfolio_vol_resample_freq = None self.__portfolio_vol_resample_type = 'mean' self.__portfolio_vol_target = 0.1 # 10% vol target self.__portfolio_vol_max_leverage = None self.__portfolio_vol_periods = 20 self.__portfolio_vol_obs_in_year = 252 # default parameters for signal level vol adjustment self.__signal_vol_adjust = False self.__signal_vol_period_shift = 0 self.__signal_vol_rebalance_freq = None self.__signal_vol_resample_freq = None self.__signal_vol_resample_type = 'mean' self.__signal_vol_target = 0.1 # 10% vol target self.__signal_vol_max_leverage = None self.__signal_vol_periods = 20 self.__signal_vol_obs_in_year = 252 # portfolio notional size self.__portfolio_notional_size = None self.__portfolio_combination = None self.__portfolio_combination_weights = None # parameters for maximum position limits (expressed as whole portfolio) self.__max_net_exposure = None self.__max_abs_exposure = None self.__position_clip_rebalance_freq = None self.__position_clip_resample_freq = None # by default apply max position criterion on last business day of month self.__position_clip_resample_type = 'mean' self.__position_clip_period_shift = 0 # take profit and stop loss parameters self.__take_profit = None self.__stop_loss = None # should we delay the signal? self.__signal_delay = 0
def force_type_conversion(self, data_frame): constants = DataConstants() logger = LoggerManager().getLogger(__name__) if data_frame is not None: if not (data_frame.empty): # Need to convert numerical and datetime columns separately # post pandas 0.23 for c in data_frame.columns: is_date = False # Special case for ECO_RELEASE_DT / FIRST_REVISION_DATE if 'ECO_RELEASE_DT' in c or 'FIRST_REVISION_DATE' in c: try: temp_col = [] # data_frame[c].values for i in range(0, len(data_frame[c].values)): try: temp_col.append( pd.to_datetime(str( int(data_frame[c].values[i])), format='%Y%m%d')) except: temp_col.append(np.datetime64('NaT')) data_frame[c] = temp_col except Exception as e: logger.warning( "Couldn't convert " + str(c) + " to date.. was this column empty? " + str(e)) else: # Only convert those Bloomberg reference fields to # dates which have been listed explicitly for d in constants.always_date_columns: if d in c: try: data_frame[c] = pd.to_datetime( data_frame[c], errors='coerce') is_date = True break except: pass # Otherwise this is not a date field so attempt to # convert into numbers if not (is_date): try: data_frame[c] = pd.to_numeric(data_frame[c], errors='ignore') except: pass logger.debug("Returning converted dataframe...") return data_frame
def load_assets(self, br=None): ##### FILL IN WITH YOUR ASSET DATA from findatapy.util.loggermanager import LoggerManager logger = LoggerManager().getLogger(__name__) # For FX basket full_bkt = [ 'EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD', 'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK' ] basket_dict = {} for i in range(0, len(full_bkt)): basket_dict[full_bkt[i]] = [full_bkt[i]] basket_dict['FX trend'] = full_bkt br = self.load_parameters(br=br) logger.info("Loading asset data...") vendor_tickers = [ 'FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS', 'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS' ] market_data_request = MarketDataRequest( start_date=br.start_date, # start date finish_date=br.finish_date, # finish date freq='daily', # daily data data_source='quandl', # use Quandl as data source tickers=full_bkt, # ticker (Thalesians) fields=['close'], # which fields to download vendor_tickers=vendor_tickers, # ticker (Quandl) vendor_fields=['close'], # which Bloomberg fields to download cache_algo='cache_algo_return') # how to return data asset_df = self.market.fetch_market(market_data_request) # If web connection fails read from CSV if asset_df is None: import pandas asset_df = pandas.read_csv( "d:/fxcta.csv", index_col=0, parse_dates=['Date'], date_parser=lambda x: pandas.datetime.strptime(x, '%Y-%m-%d')) # Signalling variables spot_df = asset_df spot_df2 = None # asset_df return asset_df, spot_df, spot_df2, basket_dict
def data_source(self, data_source): try: valid_data_source = ['ats', 'bloomberg', 'dukascopy', 'fred', 'gain', 'google', 'quandl', 'yahoo'] if not data_source in valid_data_source: LoggerManager().getLogger(__name__).warning(data_source & " is not a defined data source.") except: pass self.__data_source = data_source
def __init__(self, market_data_generator=None): self.logger = LoggerManager().getLogger(__name__) self.cache = {} self.calculations = Calculations() self.market_data_generator = market_data_generator return
def environment(self, environment): environment = environment.lower() valid_environment = data_constants.possible_data_environment if not environment in valid_environment: LoggerManager().getLogger(__name__).warning(environment + " is not a defined environment.") self.__environment = environment
def trade_side(self, trade_side): trade_side = trade_side.lower() valid_trade_side = ['trade', 'bid', 'ask'] if not trade_side in valid_trade_side: LoggerManager().getLogger(__name__).warning(trade_side + " is not a defined trade side.") self.__trade_side = trade_side
def environment(self, environment): environment = environment.lower() valid_environment = ['prod', 'backtest'] if not environment in valid_environment: LoggerManager().getLogger(__name__).warning(environment + " is not a defined environment.") self.__environment = environment
def cache_algo(self, cache_algo): cache_algo = cache_algo.lower() valid_cache_algo = ['internet_load', 'internet_load_return', 'cache_algo', 'cache_algo_return'] if not cache_algo in valid_cache_algo: LoggerManager().getLogger(__name__).warning(cache_algo + " is not a defined caching scheme") self.__cache_algo = cache_algo
def get_reference_data(self, md_request_vendor, md_request): logger = LoggerManager().getLogger(__name__) constants = DataConstants() end = datetime.utcnow() from datetime import timedelta end = end + timedelta( days=365) # because very often we may with to download data about # future calendar events # end.replace(year = end.year + 1) md_request_vendor.finish_date = end logger.debug("Requesting ref for " + md_request_vendor.tickers[0] + " etc.") data_frame = self.download_ref(md_request_vendor) logger.debug("Waiting for ref...") # Convert from vendor to findatapy tickers/fields if data_frame is not None: if data_frame.empty: return None returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) if data_frame is not None: # TODO if empty try downloading again a year later fields = self.translate_from_vendor_field(returned_fields, md_request) tickers = self.translate_from_vendor_ticker( returned_tickers, md_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined # Need to convert numerical and datetime columns separately post # pandas 0.23 data_frame = self.force_type_conversion(data_frame) # data_frame = data_frame.apply(pd.to_datetime, errors='ignore') # data_frame = data_frame.apply(pd.to_numeric, errors='ignore') # TODO coerce will be deprecated from pandas 0.23.0 onwards) so # remove! # data_frame = data_frame.convert_objects(convert_dates = 'coerce', # convert_numeric= 'coerce') return data_frame
def freq(self, freq): freq = freq.lower() valid_freq = ['tick', 'second', 'minute', 'intraday', 'hourly', 'daily', 'weekly', 'monthly', 'quarterly', 'annually'] if not freq in valid_freq: LoggerManager().getLogger(__name__).warning(freq + " is not a defined frequency") self.__freq = freq
def pad_time_series_columns(self, columns, data_frame): """Selects time series from a dataframe and if necessary creates empty columns Parameters ---------- columns : str columns to be included with this keyword data_frame : DataFrame data frame to be filtered Returns ------- DataFrame """ old_columns = data_frame.columns.tolist() common_columns = [val for val in columns if val in old_columns] uncommon_columns = [val for val in columns if val not in old_columns] uncommon_columns = [str(x) for x in uncommon_columns] data_frame = data_frame[common_columns] if len(uncommon_columns) > 0: logger = LoggerManager().getLogger(__name__) logger.info( "Padding missing columns...") # " + str(uncommon_columns)) new_data_frame = pd.DataFrame(index=data_frame.index, columns=uncommon_columns) data_frame = pd.concat([data_frame, new_data_frame], axis=1) # Force new columns to float NaNs (not objects which causes # problems with newer pandas versions) # or to NaT if they are date columns for u in uncommon_columns: is_date = False for c in constants.always_date_columns: if c in u: is_date = True if is_date: data_frame[u] = np.datetime64('NaT') else: data_frame[u] = np.nan # SLOW method below # for x in uncommon_columns: data_frame.loc[:,x] = np.nan # Get columns in same order again data_frame = data_frame[columns] return data_frame
def trade_side(self, trade_side): trade_side = trade_side.lower() valid_trade_side = ["trade", "bid", "ask"] if not trade_side in valid_trade_side: LoggerManager().getLogger(__name__).warning( trade_side + " is not a defined trade side.") self.__trade_side = trade_side
def kill_session(self, session): logger = LoggerManager().getLogger(__name__) if (session is not None): try: session.stop() logger.info("Stopping session...") finally: logger.info("Finally stopping session...") session = None
def __init__(self, market_data_generator=None): self.logger = LoggerManager().getLogger(__name__) self.market_data_generator = market_data_generator self.calculations = Calculations() self.filter = Filter() self.timezone = Timezone() self.rates = RatesFactory() return
def freq(self, freq): freq = freq.lower() valid_freq = [ "tick", "second", "minute", "intraday", "hourly", "daily", "weekly", "monthly", "quarterly", "annually" ] if not freq in valid_freq: LoggerManager().getLogger(__name__).warning( freq + " is not a defined frequency") self.__freq = freq
def combine_slices(self, data_frame_cols, data_frame_slice): # data try: if (data_frame_slice.columns.get_level_values(1).values[0] not in data_frame_cols): # return data_frame.join(data_frame_slice, how="outer") return data_frame_slice except Exception as e: LoggerManager().getLogger(__name__).warn('Data slice empty ' + str(e)) return None return None
def data_source(self, data_source): try: valid_data_source = [ "ats", "bloomberg", "dukascopy", "fred", "gain", "google", "quandl", "yahoo", "boe", "eikon" ] if not data_source in valid_data_source: LoggerManager().getLogger(__name__).warning( data_source & " is not a defined data source.") except: pass self.__data_source = data_source
def run_day_of_month_analysis(self, trading_model, resample_freq='B'): from finmarketpy.economics.seasonality import Seasonality logger = LoggerManager().getLogger(__name__) calculations = Calculations() seas = Seasonality() trading_model.construct_strategy() pnl = trading_model.strategy_pnl() # Get seasonality by day of the month pnl = pnl.resample('B').mean() rets = calculations.calculate_returns(pnl).tz_localize(None) bus_day = seas.bus_day_of_month_seasonality( rets, add_average=True, resample_freq=resample_freq) # Get seasonality by month pnl = pnl.resample('BM').mean() rets = calculations.calculate_returns(pnl).tz_localize(None) month = seas.monthly_seasonality(rets) logger.info("About to plot seasonality...") style = Style() # Plotting spot over day of month/month of year style.color = 'Blues' style.scale_factor = trading_model.SCALE_FACTOR style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.html' style.title = trading_model.FINAL_STRATEGY + ' day of month seasonality' style.display_legend = False style.color_2_series = [bus_day.columns[-1]] style.color_2 = ['red'] # red, pink style.linewidth_2 = 4 style.linewidth_2_series = [bus_day.columns[-1]] style.y_axis_2_series = [bus_day.columns[-1]] self.chart.plot(bus_day, chart_type='line', style=style) style = Style() style.scale_factor = trading_model.SCALE_FACTOR style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.png' style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.html' style.title = trading_model.FINAL_STRATEGY + ' month of year seasonality' self.chart.plot(month, chart_type='line', style=style) return month
def get_daily_data(self, md_request, md_request_vendor): logger = LoggerManager().getLogger(__name__) data_frame = self.download_daily(md_request_vendor) # Convert from vendor to findatapy tickers/fields if data_frame is not None: if data_frame.empty: logger.info("No tickers returned for...") try: logger.info(str(md_request_vendor.tickers)) except: pass return None returned_fields = data_frame.columns.get_level_values(0) returned_tickers = data_frame.columns.get_level_values(1) # TODO if empty try downloading again a year later try: fields = self.translate_from_vendor_field( returned_fields, md_request) except: print('Problem translating vendor field') tickers = self.translate_from_vendor_ticker( returned_tickers, md_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) # Convert numerical columns to floats and dates to dates (avoids # having object columns which can cause issues with later Pandas) data_frame = self.force_type_conversion(data_frame) data_frame.columns = ticker_combined data_frame.index.name = 'Date' # Force sorting of index try: data_frame = data_frame.sort_index() except: pass return data_frame