def get_economic_event_ret_over_custom_event_day(self, data_frame_in, event_dates, name, event, start, end, lagged = False, NYC_cutoff = 10): filter = Filter() event_dates = filter.filter_time_series_by_date(start, end, event_dates) data_frame = data_frame_in.copy(deep=True) # because we change the dates! timezone = Timezone() calendar = Calendar() bday = CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri') event_dates_nyc = timezone.convert_index_from_UTC_to_new_york_time(event_dates) average_hour_nyc = numpy.average(event_dates_nyc.index.hour) event_dates = calendar.floor_date(event_dates) # realised is traditionally on later day eg. 3rd Jan realised ON is 2nd-3rd Jan realised # so if Fed meeting is on 2nd Jan later, then we need realised labelled on 3rd (so minus a day) # implied expires on next day eg. 3rd Jan implied ON is 3rd-4th Jan implied # TODO smarter way of adjusting dates, as sometimes events can be before/after 10am NY cut if (lagged and average_hour_nyc >= NYC_cutoff): data_frame.index = data_frame.index - bday elif (not lagged and average_hour_nyc < NYC_cutoff): # ie. implied data_frame.index = data_frame.index + bday # set as New York time and select only those ON vols at the 10am NY cut just before the event data_frame_events = data_frame.ix[event_dates.index] data_frame_events.columns = data_frame.columns.values + '-' + name + ' ' + event return data_frame_events
def bus_day_of_month_seasonality(self, data_frame, month_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], cum = True, cal = "FX", partition_by_month = True, add_average = False, price_index = False): calculations = Calculations() filter = Filter() if price_index: data_frame = data_frame.resample('B') # resample into business days data_frame = calculations.calculate_returns(data_frame) data_frame.index = pandas.to_datetime(data_frame.index) data_frame = filter.filter_time_series_by_holidays(data_frame, cal) monthly_seasonality = calculations.average_by_month_day_by_bus_day(data_frame, cal) monthly_seasonality = monthly_seasonality.loc[month_list] if partition_by_month: monthly_seasonality = monthly_seasonality.unstack(level=0) if add_average: monthly_seasonality['Avg'] = monthly_seasonality.mean(axis=1) if cum is True: if partition_by_month: monthly_seasonality.loc[0] = numpy.zeros(len(monthly_seasonality.columns)) # monthly_seasonality.index = monthly_seasonality.index + 1 # shifting index monthly_seasonality = monthly_seasonality.sort_index() monthly_seasonality = calculations.create_mult_index(monthly_seasonality) return monthly_seasonality
def compare_strategy_vs_benchmark(self, br, strategy_df, benchmark_df): """Compares the trading strategy we are backtesting against a benchmark Parameters ---------- br : BacktestRequest Parameters for backtest such as start and finish dates strategy_df : pandas.DataFrame Strategy time series benchmark_df : pandas.DataFrame Benchmark time series """ include_benchmark = False calc_stats = False if hasattr(br, 'include_benchmark'): include_benchmark = br.include_benchmark if hasattr(br, 'calc_stats'): calc_stats = br.calc_stats if include_benchmark: ret_stats = RetStats() risk_engine = RiskEngine() filter = Filter() calculations = Calculations() # align strategy time series with that of benchmark strategy_df, benchmark_df = strategy_df.align(benchmark_df, join='left', axis = 0) # if necessary apply vol target to benchmark (to make it comparable with strategy) if hasattr(br, 'portfolio_vol_adjust'): if br.portfolio_vol_adjust is True: benchmark_df = risk_engine.calculate_vol_adjusted_index_from_prices(benchmark_df, br = br) # only calculate return statistics if this has been specified (note when different frequencies of data # might underrepresent vol # if calc_stats: benchmark_df = benchmark_df.fillna(method='ffill') ret_stats.calculate_ret_stats_from_prices(benchmark_df, br.ann_factor) if calc_stats: benchmark_df.columns = ret_stats.summary() # realign strategy & benchmark strategy_benchmark_df = strategy_df.join(benchmark_df, how='inner') strategy_benchmark_df = strategy_benchmark_df.fillna(method='ffill') strategy_benchmark_df = filter.filter_time_series_by_date(br.plot_start, br.finish_date, strategy_benchmark_df) strategy_benchmark_df = calculations.create_mult_index_from_prices(strategy_benchmark_df) self._benchmark_pnl = benchmark_df self._benchmark_ret_stats = ret_stats return strategy_benchmark_df return strategy_df
def __init__(self, df = None): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() self.speed_cache = SpeedCache() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return
asset = 'EURUSD' # Download the whole all market data for pricing options (vol surface) md_request = MarketDataRequest(start_date=month_before, finish_date=today, data_source='bloomberg', freq='intraday', fields='open', tickers=[asset + 'VON'], vendor_tickers=[asset + 'VON BGN Curncy'], cache_algo='cache_algo_return') from findatapy.timeseries import Calculations, Filter calc = Calculations() filter = Filter() freq_min_mult = 5 # Resample into 1 minute data and fill down all points implied_vol_df = market.fetch_market(md_request)[ asset + 'VON.open'].resample('1min').first().fillna(method='ffill') # Filter data by 1000 New York time, and return back to UTC, remove any out of trading hours # Then strip of time of day from the timestamp implied_vol_df = filter.filter_time_series_by_time_of_day_timezone( 10, 0, implied_vol_df, timezone_of_snap='America/New_York') implied_vol_df = filter.remove_out_FX_out_of_hours(implied_vol_df) implied_vol_df.index = pd.to_datetime(implied_vol_df.index.date) implied_vol_df = pd.DataFrame(implied_vol_df) implied_vol_df.columns = [asset + 'VON.close']
class MarketDataGenerator(object): """Returns market data time series by directly calling market data sources. At present it supports Bloomberg (bloomberg), Yahoo (yahoo), Quandl (quandl), FRED (fred) etc. which are implemented in subclasses of DataVendor class. This provides a common wrapper for all these data sources. """ def __init__(self): self.config = ConfigManager().get_instance() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.calculations = Calculations() self.io_engine = IOEngine() self._intraday_code = -1 self.days_expired_intraday_contract_download = -1 return def set_intraday_code(self, code): self._intraday_code = code def get_data_vendor(self, source): """Loads appropriate data service class Parameters ---------- source : str the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc. we can also have forms like "bloomberg-boe" separated by hyphens Returns ------- DataVendor """ data_vendor = None try: source = source.split("-")[0] except: self.logger.error("Was data source specified?") return None if source == 'bloomberg': from findatapy.market.datavendorbbg import DataVendorBBGOpen data_vendor = DataVendorBBGOpen() elif source == 'quandl': from findatapy.market.datavendorweb import DataVendorQuandl data_vendor = DataVendorQuandl() elif source == 'ons': from findatapy.market.datavendorweb import DataVendorONS data_vendor = DataVendorONS() elif source == 'boe': from findatapy.market.datavendorweb import DataVendorBOE data_vendor = DataVendorBOE() elif source == 'dukascopy': from findatapy.market.datavendorweb import DataVendorDukasCopy data_vendor = DataVendorDukasCopy() elif source == 'fxcm': from findatapy.market.datavendorweb import DataVendorFXCM data_vendor = DataVendorFXCM() elif source == 'alfred': from findatapy.market.datavendorweb import DataVendorALFRED data_vendor = DataVendorALFRED() elif source in [ 'yahoo', 'google', 'fred', 'oecd', 'eurostat', 'edgar-index' ]: from findatapy.market.datavendorweb import DataVendorPandasWeb data_vendor = DataVendorPandasWeb() # TODO add support for other data sources (like Reuters) return data_vendor def fetch_market_data(self, market_data_request, kill_session=True): """Loads time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ # data_vendor = self.get_data_vendor(market_data_request.data_source) # check if tickers have been specified (if not load all of them for a category) # also handle single tickers/list tickers create_tickers = False if market_data_request.vendor_tickers is not None and market_data_request.tickers is None: market_data_request.tickers = market_data_request.vendor_tickers tickers = market_data_request.tickers if tickers is None: create_tickers = True elif isinstance(tickers, str): if tickers == '': create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: market_data_request.tickers = ConfigManager().get_instance( ).get_tickers_list_for_category(market_data_request.category, market_data_request.data_source, market_data_request.freq, market_data_request.cut) # intraday or tick: only one ticker per cache file if (market_data_request.freq in ['intraday', 'tick', 'second', 'hour', 'minute']): data_frame_agg = self.download_intraday_tick(market_data_request) # daily: multiple tickers per cache file - assume we make one API call to vendor library else: data_frame_agg = self.download_daily(market_data_request) if ('internet_load' in market_data_request.cache_algo): self.logger.debug("Internet loading.. ") # signal to data_vendor template to exit session # if data_vendor is not None and kill_session == True: data_vendor.kill_session() if (market_data_request.cache_algo == 'cache_algo'): self.logger.debug( "Only caching data in memory, do not return any time series.") return # only return time series if specified in the algo if 'return' in market_data_request.cache_algo: # special case for events/events-dt which is not indexed like other tables (also same for downloading futures # contracts dates) if market_data_request.category is not None: if 'events' in market_data_request.category: return data_frame_agg # pad columns a second time (is this necessary to do here again?) # TODO only do this for not daily data? try: return self.filter.filter_time_series(market_data_request, data_frame_agg, pad_columns=True) except: if data_frame_agg is not None: return data_frame_agg import traceback self.logger.warn("No data returned for " + str(market_data_request.tickers)) return None def create_time_series_hash_key(self, market_data_request, ticker=None): """Creates a hash key for retrieving the time series Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ if (isinstance(ticker, list)): ticker = ticker[0] return self.create_cache_file_name( MarketDataRequest().create_category_key(market_data_request, ticker)) def download_intraday_tick(self, market_data_request): """Loads intraday time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ data_frame_agg = None calcuations = Calculations() ticker_cycle = 0 data_frame_group = [] # single threaded version # handle intraday ticker calls separately one by one if len(market_data_request.tickers) == 1 or DataConstants( ).market_thread_no['other'] == 1: for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway # old_finish_date = market_data_request_single.finish_date # # market_data_request_single.finish_date = self.refine_expiry_date(market_data_request) # # if market_data_request_single.finish_date >= market_data_request_single.start_date: # data_frame_single = data_vendor.load_ticker(market_data_request_single) # else: # data_frame_single = None # # market_data_request_single.finish_date = old_finish_date # # data_frame_single = data_vendor.load_ticker(market_data_request_single) data_frame_single = self.fetch_single_time_series( market_data_request) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') data_frame_group.append(data_frame_single) # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single # key = self.create_category_key(market_data_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_group is not None: data_frame_agg = calcuations.pandas_outer_join( data_frame_group) return data_frame_agg else: market_data_request_list = [] # create a list of MarketDataRequests for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 market_data_request_list.append(market_data_request_single) return self.fetch_group_time_series(market_data_request_list) def fetch_single_time_series(self, market_data_request): market_data_request = MarketDataRequest(md_request=market_data_request) # only includes those tickers have not expired yet! start_date = pandas.Timestamp(market_data_request.start_date).date() import datetime current_date = datetime.datetime.utcnow().date() from datetime import timedelta tickers = market_data_request.tickers vendor_tickers = market_data_request.vendor_tickers expiry_date = market_data_request.expiry_date config = ConfigManager().get_instance() # in many cases no expiry is defined so skip them for i in range(0, len(tickers)): try: expiry_date = config.get_expiry_for_ticker( market_data_request.data_source, tickers[i]) except: pass if expiry_date is not None: expiry_date = pandas.Timestamp(expiry_date).date() # use pandas Timestamp, a bit more robust with weird dates (can fail if comparing date vs datetime) # if the expiry is before the start date of our download don't bother downloading this ticker if expiry_date < start_date: tickers[i] = None # special case for futures-contracts which are intraday # avoid downloading if the expiry date is very far in the past # (we need this before there might be odd situations where we run on an expiry date, but still want to get # data right till expiry time) if market_data_request.category == 'futures-contracts' and market_data_request.freq == 'intraday' \ and self.days_expired_intraday_contract_download > 0: if expiry_date + timedelta( days=self.days_expired_intraday_contract_download ) < current_date: tickers[i] = None if vendor_tickers is not None and tickers[i] is None: vendor_tickers[i] = None market_data_request.tickers = [e for e in tickers if e != None] if vendor_tickers is not None: market_data_request.vendor_tickers = [ e for e in vendor_tickers if e != None ] data_frame_single = None if len(market_data_request.tickers) > 0: data_frame_single = self.get_data_vendor( market_data_request.data_source).load_ticker( market_data_request) if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' # will fail for dataframes which includes dates/strings (eg. futures contract names) try: data_frame_single = data_frame_single.astype('float32') except: self.logger.warning('Could not convert to float') if market_data_request.freq == "second": data_frame_single = data_frame_single.resample("1s") return data_frame_single def fetch_group_time_series(self, market_data_request_list): data_frame_agg = None thread_no = DataConstants.market_thread_no['other'] if market_data_request_list[0].data_source in DataConstants( ).market_thread_no: thread_no = DataConstants.market_thread_no[ market_data_request_list[0].data_source] if thread_no > 0: pool = SwimPool().create_pool( thread_technique=DataConstants.market_thread_technique, thread_no=thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, market_data_request_list) data_frame_group = result.get() pool.close() pool.join() else: data_frame_group = [] for md_request in market_data_request_list: data_frame_group.append( self.fetch_single_time_series(md_request)) # collect together all the time series if data_frame_group is not None: data_frame_group = [i for i in data_frame_group if i is not None] # for debugging! # import pickle # import datetime # pickle.dump(data_frame_group, open(str(datetime.datetime.now()).replace(':', '-').replace(' ', '-').replace(".", "-") + ".p", "wb")) if data_frame_group is not None: try: data_frame_agg = self.calculations.pandas_outer_join( data_frame_group) except Exception as e: self.logger.warning( 'Possible overlap of columns? Have you specifed same ticker several times: ' + str(e)) return data_frame_agg def download_daily(self, market_data_request): """Loads daily time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ key = MarketDataRequest().create_category_key(market_data_request) is_key_overriden = False for k in DataConstants().override_multi_threading_for_categories: if k in key: is_key_overriden = True break # by default use other thread_no = DataConstants().market_thread_no['other'] if market_data_request.data_source in DataConstants().market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request.data_source] # daily data does not include ticker in the key, as multiple tickers in the same file if thread_no == 1: # data_frame_agg = data_vendor.load_ticker(market_data_request) data_frame_agg = self.fetch_single_time_series(market_data_request) else: market_data_request_list = [] # when trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution group_size = max( int(len(market_data_request.tickers) / thread_no - 1), 0) if group_size == 0: group_size = 1 # split up tickers into groups related to number of threads to call for i in range(0, len(market_data_request.tickers), group_size): market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = market_data_request.tickers[ i:i + group_size] if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = \ market_data_request.vendor_tickers[i:i + group_size] market_data_request_list.append(market_data_request_single) # special case where we make smaller calls one after the other if is_key_overriden: data_frame_list = [] for md in market_data_request_list: data_frame_list.append(self.fetch_single_time_series(md)) data_frame_agg = self.calculations.pandas_outer_join( data_frame_list) else: data_frame_agg = self.fetch_group_time_series( market_data_request_list) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg def refine_expiry_date(self, market_data_request): # expiry date if market_data_request.expiry_date is None: ConfigManager().get_instance().get_expiry_for_ticker( market_data_request.data_source, market_data_request.ticker) return market_data_request def create_cache_file_name(self, filename): return DataConstants().folder_time_series_data + "/" + filename
def get_fx_cross_tick(self, start, end, cross, cut="NYC", data_source="dukascopy", cache_algo='internet_load_return', type='spot', environment='backtest', fields=['bid', 'ask']): if isinstance(cross, str): cross = [cross] market_data_request = MarketDataRequest( gran_freq="tick", freq_mult=1, freq='tick', cut=cut, fields=['bid', 'ask', 'bidv', 'askv'], cache_algo=cache_algo, environment=environment, start_date=start, finish_date=end, data_source=data_source, category='fx') market_data_generator = self.market_data_generator data_frame_agg = None for cr in cross: if (type == 'spot'): market_data_request.tickers = cr cross_vals = market_data_generator.fetch_market_data( market_data_request) if cross_vals is not None: # If user only wants 'close' calculate that from the bid/ask fields if fields == ['close']: cross_vals = cross_vals[[cr + '.bid', cr + '.ask']].mean(axis=1) cross_vals.columns = [cr + '.close'] else: filter = Filter() filter_columns = [cr + '.' + f for f in fields] cross_vals = filter.filter_time_series_by_columns( filter_columns, cross_vals) if data_frame_agg is None: data_frame_agg = cross_vals else: data_frame_agg = data_frame_agg.join(cross_vals, how='outer') if data_frame_agg is not None: # Strip the nan elements data_frame_agg = data_frame_agg.dropna() return data_frame_agg
def _get_individual_fx_cross(self, market_data_request): cr = market_data_request.cross type = market_data_request.type freq = market_data_request.freq base = cr[0:3] terms = cr[3:6] if (type == 'spot'): # Non-USD crosses if base != 'USD' and terms != 'USD': base_USD = self.fxconv.correct_notation('USD' + base) terms_USD = self.fxconv.correct_notation('USD' + terms) # TODO check if the cross exists in the database # Download base USD cross market_data_request.tickers = base_USD market_data_request.category = 'fx' base_vals = self.market_data_generator.fetch_market_data( market_data_request) # Download terms USD cross market_data_request.tickers = terms_USD market_data_request.category = 'fx' terms_vals = self.market_data_generator.fetch_market_data( market_data_request) # If quoted USD/base flip to get USD terms if (base_USD[0:3] == 'USD'): base_vals = 1 / base_vals # If quoted USD/terms flip to get USD terms if (terms_USD[0:3] == 'USD'): terms_vals = 1 / terms_vals base_vals.columns = ['temp'] terms_vals.columns = ['temp'] cross_vals = base_vals.div(terms_vals, axis='index') cross_vals.columns = [cr + '.close'] base_vals.columns = [base_USD + '.close'] terms_vals.columns = [terms_USD + '.close'] else: # if base == 'USD': non_USD = terms # if terms == 'USD': non_USD = base correct_cr = self.fxconv.correct_notation(cr) market_data_request.tickers = correct_cr market_data_request.category = 'fx' cross_vals = self.market_data_generator.fetch_market_data( market_data_request) # Special case for USDUSD! if base + terms == 'USDUSD': if freq == 'daily': cross_vals = pandas.DataFrame( 1, index=cross_vals.index, columns=cross_vals.columns) filter = Filter() cross_vals = filter.filter_time_series_by_holidays( cross_vals, cal='WEEKDAY') else: # Flip if not convention (eg. JPYUSD) if (correct_cr != cr): cross_vals = 1 / cross_vals # cross_vals = self.market_data_generator.harvest_time_series(market_data_request) cross_vals.columns = [cr + '.close'] elif type[0:3] == "tot": if freq == 'daily': # Download base USD cross market_data_request.tickers = base + 'USD' market_data_request.category = 'fx-tot' if type == "tot": base_vals = self.market_data_generator.fetch_market_data( market_data_request) else: x = 0 # Download terms USD cross market_data_request.tickers = terms + 'USD' market_data_request.category = 'fx-tot' if type == "tot": terms_vals = self.market_data_generator.fetch_market_data( market_data_request) else: pass # base_rets = self.calculations.calculate_returns(base_vals) # terms_rets = self.calculations.calculate_returns(terms_vals) # Special case for USDUSD case (and if base or terms USD are USDUSD if base + terms == 'USDUSD': base_rets = self.calculations.calculate_returns(base_vals) cross_rets = pandas.DataFrame(0, index=base_rets.index, columns=base_rets.columns) elif base + 'USD' == 'USDUSD': cross_rets = -self.calculations.calculate_returns( terms_vals) elif terms + 'USD' == 'USDUSD': cross_rets = self.calculations.calculate_returns(base_vals) else: base_rets = self.calculations.calculate_returns(base_vals) terms_rets = self.calculations.calculate_returns( terms_vals) cross_rets = base_rets.sub(terms_rets.iloc[:, 0], axis=0) # First returns of a time series will by NaN, given we don't know previous point cross_rets.iloc[0] = 0 cross_vals = self.calculations.create_mult_index(cross_rets) cross_vals.columns = [cr + '-tot.close'] elif freq == 'intraday': self.logger.info( 'Total calculated returns for intraday not implemented yet' ) return None return cross_vals
class VolStats(object): """Arranging underlying volatility market in easier to read format. Also provides methods for calculating various volatility metrics, such as realized_vol volatility and volatility risk premium. Has extensive support for estimating implied_vol volatility addons. """ def __init__(self, market_df=None, intraday_spot_df=None): self._market_df = market_df self._intraday_spot_df = intraday_spot_df self._calculations = Calculations() self._timezone = Timezone() self._filter = Filter() def calculate_realized_vol(self, asset, spot_df=None, returns_df=None, tenor_label="ON", freq='daily', freq_min_mult=1, hour_of_day=10, minute_of_day=0, field='close', returns_calc='simple', timezone_hour_minute='America/New_York'): """Calculates rolling realized vol with daily cutoffs either using daily spot data or intraday spot data (which is assumed to be in UTC timezone) Parameters ---------- asset : str asset to be calculated spot_df : pd.DataFrame minute spot returns (freq_min_mult should be the same as the frequency and should have timezone set) tenor_label : str tenor to calculate freq_min_mult : int frequency multiply for data (1 = 1 min) hour_of_day : closing time of data in the timezone specified eg. 10 which is 1000 time (default = 10) minute_of_day : closing time of data in the timezone specified eg. 0 which is 0 time (default = 0) field : str By default 'close' returns_calc : str 'simple' calculate simple returns 'log' calculate log returns timezone_hour_minute : str The timezone for the closing hour/minute (default: 'America/New_York') Returns ------- pd.DataFrame of realized volatility """ if returns_df is None: if spot_df is None: if freq == 'daily': spot_df = self._market_df[asset + "." + field] else: spot_df = self._intraday_spot_df[asset + "." + field] if returns_calc == 'simple': returns_df = self._calculations.calculate_returns(spot_df) else: returns_df = self._calculations.calculate_log_returns(spot_df) cal = Calendar() tenor_days = cal.get_business_days_tenor(tenor_label) if freq == 'intraday': # Annualization factor (1440 is number of minutes in the day) mult = int(1440.0 / float(freq_min_mult)) realized_rolling = self._calculations.rolling_volatility( returns_df, tenor_days * mult, obs_in_year=252 * mult) # Convert to NYC time (or whatever timezone hour is specified in) realized_rolling = self._timezone.convert_index_aware_to_alt( realized_rolling, timezone_hour_minute) realized_vol = self._filter.filter_time_series_by_time_of_day( hour_of_day, minute_of_day, realized_rolling) realized_vol = self._timezone.convert_index_aware_to_UTC_time( realized_vol) realized_vol = self._timezone.set_as_no_timezone(realized_vol) elif freq == 'daily': realized_vol = self._calculations.rolling_volatility( spot_df, tenor_days, obs_in_year=252) # Strip the time off the date realized_vol.index = realized_vol.index.date realized_vol = pd.DataFrame(realized_vol) realized_vol.columns = [asset + 'H' + tenor_label + '.close'] return realized_vol def calculate_vol_risk_premium(self, asset, tenor_label="ON", implied_vol=None, realized_vol=None, field='close', adj_ON_friday=False): """Calculates volatility risk premium given implied and realized quotes (ie. implied - realized) and tenor Calculates both a version which is aligned (VRP), where the implied and realized volatilities cover the same period (note: you will have a gap for recent points, where you can't grab future implied volatilities), and an unaligned version (VRPV), which is the typical one used in the market Parameters ---------- asset : str asset to calculate value for tenor_label : str tenor to calculate implied_vol : pd.DataFrame implied vol quotes where columns are of the form eg. EURUSDV1M.close realized_vol : pd.DataFrame realized vol eg. EURUSDH1M.close field : str the field of the data to use (default: 'close') Returns ------- pd.DataFrame of vrp (both lagged - VRPV & contemporanous - VRP) """ cal = Calendar() tenor_days = cal.get_business_days_tenor(tenor_label) if tenor_label == 'ON' and adj_ON_friday: implied_vol = self.adjust_implied_ON_fri_vol(implied_vol) # Add x business days to implied_vol to make it equivalent to realized_vol (better than "shift") # approximation for options which are not ON or 1W # bday = CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri') implied_vol = implied_vol.copy(deep=True) implied_unaligned = implied_vol.copy(deep=True) cols_to_change = implied_vol.columns.values new_cols = [] for i in range(0, len(cols_to_change)): temp_col = list(cols_to_change[i]) temp_col[6] = 'U' new_cols.append(''.join(temp_col)) implied_vol.columns = new_cols ## Construct volatility risk premium such that implied covers the same period as realized # Add by number of days (note: for overnight tenors/1 week in FX we can add business days like this) # For because they are always +1 business days, +5 business days (exc. national holidays and only including # weekend). For longer dates like 1 month this is an approximation implied_vol.index = [ pd.Timestamp(x) + pd.tseries.offsets.BDay(tenor_days) for x in implied_vol.index ] vrp = implied_vol.join(realized_vol, how='outer') vrp[asset + "VRP" + tenor_label + ".close"] = vrp[asset + "U" + tenor_label + "." + field] \ - vrp[asset + "H" + tenor_label + "." + field] ## Construct "traditional" volatility risk premium, # so implied does not cover the same period as realized volatility vrp = vrp.join(implied_unaligned, how='outer') vrp[asset + "VRPV" + tenor_label + ".close"] = \ vrp[asset + "V" + tenor_label + "." + field] - vrp[asset + "H" + tenor_label + "." + field] return vrp def calculate_implied_vol_addon(self, asset, implied_vol=None, tenor_label='ON', model_window=20, model_algo='weighted-median-model', field='close', adj_ON_friday=True): """Calculates the implied volatility add on for specific tenors. The implied volatility addon can be seen as a proxy for the event weights of large scheduled events for that day, such as the US employment report. If there are multiple large events in the same period covered by the option, then this approach is not going to be able to disentangle this. Parameters ---------- asset : str Asset to be traded (eg. EURUSD) tenor: str eg. ON Returns ------ Implied volatility addon """ part = 'V' if implied_vol is None: implied_vol = self._market_df[asset + "V" + tenor_label + "." + field] implied_vol = implied_vol.copy(deep=True) implied_vol = pd.DataFrame(implied_vol) # So we eliminate impact of holidays on addons if tenor_label == 'ON' and adj_ON_friday: implied_vol = self.adjust_implied_ON_fri_vol(implied_vol) implied_vol = implied_vol.dropna( ) # otherwise the moving averages get corrupted # vol_data_avg_by_weekday = vol_data.groupby(vol_data.index.weekday).transform(lambda x: pandas.rolling_mean(x, window=10)) # Create a simple estimate for recent implied_vol volatility using multiple tenors # vol_data_20D_avg = time_series_calcs.rolling_average(vol_data,window1) # vol_data_10D_avg = time_series_calcs.rolling_average(vol_data,window1) # vol_data_5D_avg = time_series_calcs.rolling_average(vol_data, window1) if model_algo == 'weighted-median-model': vol_data_20D_avg = self._calculations.rolling_median( implied_vol, model_window) vol_data_10D_avg = self._calculations.rolling_median( implied_vol, model_window) vol_data_5D_avg = self._calculations.rolling_median( implied_vol, model_window) vol_data_avg = (vol_data_20D_avg + vol_data_10D_avg + vol_data_5D_avg) / 3 vol_data_addon = implied_vol - vol_data_avg elif model_algo == 'weighted-mean-model': vol_data_20D_avg = self._calculations.rolling_average( implied_vol, model_window) vol_data_10D_avg = self._calculations.rolling_average( implied_vol, model_window) vol_data_5D_avg = self._calculations.rolling_average( implied_vol, model_window) vol_data_avg = (vol_data_20D_avg + vol_data_10D_avg + vol_data_5D_avg) / 3 vol_data_addon = implied_vol - vol_data_avg # TODO add other implied vol addon models vol_data_addon = pd.DataFrame(vol_data_addon) implied_vol = pd.DataFrame(implied_vol) new_cols = implied_vol.columns.values new_cols = [ w.replace(part + tenor_label, 'ADD' + tenor_label) for w in new_cols ] vol_data_addon.columns = new_cols return vol_data_addon def adjust_implied_ON_fri_vol(self, data_frame): cols_ON = [x for x in data_frame.columns if 'VON' in x] for c in cols_ON: data_frame[c][data_frame.index.dayofweek == 4] = data_frame[c][ data_frame.index.dayofweek == 4] * math.sqrt(3) # data_frame[data_frame.index.dayofweek == 4] = data_frame[data_frame.index.dayofweek == 4] * math.sqrt(3) return data_frame
class Market(object): """Higher level class which fetches market data using underlying classes such as MarketDataGenerator. Also contains several other classes, which are for asset specific instances, for example for generating FX spot time series or FX volatility surfaces. """ def __init__(self, market_data_generator=None, md_request=None): if market_data_generator is None: if constants.default_market_data_generator == "marketdatagenerator": from findatapy.market import MarketDataGenerator market_data_generator = MarketDataGenerator() elif constants.default_market_data_generator == 'cachedmarketdatagenerator': # NOT CURRENTLY IMPLEMENTED FOR FUTURE USE from finaddpy.market import CachedMarketDataGenerator market_data_generator = CachedMarketDataGenerator() self.speed_cache = SpeedCache() self.market_data_generator = market_data_generator self.filter = Filter() self.md_request = md_request def fetch_market(self, md_request=None): """Fetches market data for specific tickers The user does not need to know to the low level API for each data provider works. The MarketDataRequest needs to supply parameters that define each data request. It has details which include: ticker eg. EURUSD field eg. close category eg. fx data_source eg. bloomberg start_date eg. 01 Jan 2015 finish_date eg. 01 Jan 2017 It can also have many optional attributes, such as vendor_ticker eg. EURUSD Curncy vendor_field eg. PX_LAST Parameters ---------- md_request : MarketDataRequest Describing what market data to fetch Returns ------- pandas.DataFrame Contains the requested market data """ if self.md_request is not None: md_request = self.md_request key = md_request.generate_key() data_frame = None # If internet_load has been specified don't bother going to cache (might end up calling lower level cache though # through MarketDataGenerator if 'cache_algo' in md_request.cache_algo: data_frame = self.speed_cache.get_dataframe(key) if data_frame is not None: return data_frame # Special cases when a predefined category has been asked if md_request.category is not None: if (md_request.category == 'fx-spot-volume' and md_request.data_source == 'quandl'): # NOT CURRENTLY IMPLEMENTED FOR FUTURE USE from findatapy.market.fxclsvolume import FXCLSVolume fxcls = FXCLSVolume( market_data_generator=self.market_data_generator) data_frame = fxcls.get_fx_volume( md_request.start_date, md_request.finish_date, md_request.tickers, cut="LOC", data_source="quandl", cache_algo=md_request.cache_algo) # For FX we have special methods for returning cross rates or total returns if (md_request.category == 'fx' or md_request.category == 'fx-tot') and md_request.tickers is not None: fxcf = FXCrossFactory( market_data_generator=self.market_data_generator) if md_request.category == 'fx': type = 'spot' elif md_request.category == 'fx-tot': type = 'tot' if (md_request.freq != 'tick' and md_request.fields == ['close']) or \ (md_request.freq == 'tick' and md_request.data_source in ['dukascopy', 'fxcm']): data_frame = fxcf.get_fx_cross( md_request.start_date, md_request.finish_date, md_request.tickers, cut=md_request.cut, data_source=md_request.data_source, freq=md_request.freq, cache_algo=md_request.cache_algo, type=type, environment=md_request.environment, fields=md_request.fields) # For FX implied volatility we can return the full surface if (md_request.category == 'fx-implied-vol'): if md_request.tickers is not None and md_request.freq == 'daily': df = [] fxvf = FXVolFactory( market_data_generator=self.market_data_generator) for t in md_request.tickers: if len(t) == 6: df.append( fxvf.get_fx_implied_vol( md_request.start_date, md_request.finish_date, t, fxvf.tenor, cut=md_request.cut, data_source=md_request.data_source, part=fxvf.part, cache_algo=md_request.cache_algo)) if df != []: data_frame = Calculations().pandas_outer_join(df) # For FX vol market return all the market data necessarily for pricing options # which includes FX spot, volatility surface, forward points, deposit rates if (md_request.category == 'fx-vol-market'): if md_request.tickers is not None: df = [] fxcf = FXCrossFactory( market_data_generator=self.market_data_generator) fxvf = FXVolFactory( market_data_generator=self.market_data_generator) rates = RatesFactory( market_data_generator=self.market_data_generator) # For each FX cross fetch the spot, vol and forward points for t in md_request.tickers: if len(t) == 6: df.append( fxcf.get_fx_cross( start=md_request.start_date, end=md_request.finish_date, cross=t, cut=md_request.cut, data_source=md_request.data_source, freq=md_request.freq, cache_algo=md_request.cache_algo, type='spot', environment=md_request.environment, fields=['close'])) df.append( fxvf.get_fx_implied_vol( md_request.start_date, md_request.finish_date, t, fxvf.tenor, cut=md_request.cut, data_source=md_request.data_source, part=fxvf.part, cache_algo=md_request.cache_algo)) df.append( rates.get_fx_forward_points( md_request.start_date, md_request.finish_date, t, fxvf.tenor, cut=md_request.cut, data_source=md_request.data_source, cache_algo=md_request.cache_algo)) # Lastly fetch the base depos df.append( rates.get_base_depos( md_request.start_date, md_request.finish_date, ["USD", "EUR", "CHF", "GBP"], fxvf.tenor, cut=md_request.cut, data_source=md_request.data_source, cache_algo=md_request.cache_algo)) if df != []: data_frame = Calculations().pandas_outer_join(df) if md_request.abstract_curve is not None: data_frame = md_request.abstract_curve.fetch_continuous_time_series \ (md_request, self.market_data_generator) if (md_request.category == 'crypto'): # Add more features later data_frame = self.market_data_generator.fetch_market_data( md_request) # TODO add more special examples here for different asset classes # the idea is that we do all the market data downloading here, rather than elsewhere # By default: pass the market data request to MarketDataGenerator if data_frame is None: data_frame = self.market_data_generator.fetch_market_data( md_request) # Special case where we can sometimes have duplicated data times if md_request.freq == 'intraday' and md_request.cut == 'BSTP': data_frame = self.filter.remove_duplicate_indices(data_frame) # Push into cache if md_request.push_to_cache: self.speed_cache.put_dataframe(key, data_frame) return data_frame
def get_intraday_moves_over_custom_event(self, data_frame_rets, ef_time_frame, vol=False, minute_start=5, mins=3 * 60, min_offset=0, create_index=False, resample=False, freq='minutes', cumsum=True, adj_cumsum_zero_point=False, adj_zero_point=2): filter = Filter() ef_time_frame = filter.filter_time_series_by_date( data_frame_rets.index[0], data_frame_rets.index[-1], ef_time_frame) ef_time = ef_time_frame.index if freq == 'minutes': ef_time_start = ef_time - timedelta(minutes=minute_start) ef_time_end = ef_time + timedelta(minutes=mins) ann_factor = 252 * 1440 elif freq == 'days': ef_time = ef_time_frame.index.normalize() ef_time_start = ef_time - pandas.tseries.offsets.BusinessDay( ) * minute_start ef_time_end = ef_time + pandas.tseries.offsets.BusinessDay() * mins ann_factor = 252 elif freq == 'weeks': ef_time = ef_time_frame.index.normalize() ef_time_start = ef_time - pandas.tseries.offsets.Week( ) * minute_start ef_time_end = ef_time + pandas.tseries.offsets.Week() * mins ann_factor = 52 ords = list(range(-minute_start + min_offset, mins + min_offset)) lst_ords = list(ords) # All data needs to be equally spaced if resample: # Make sure time series is properly sampled at 1 min intervals if freq == 'minutes': data_frame_rets = data_frame_rets.resample('1min').last() data_frame_rets = data_frame_rets.fillna(value=0) data_frame_rets = filter.remove_out_FX_out_of_hours( data_frame_rets) elif freq == 'daily': data_frame_rets = data_frame_rets.resample('B').last() data_frame_rets = data_frame_rets.fillna(value=0) elif freq == 'weekly': data_frame_rets = data_frame_rets.resample('W').last() data_frame_rets = data_frame_rets.fillna(value=0) start_index = data_frame_rets.index.searchsorted(ef_time_start) finish_index = data_frame_rets.index.searchsorted(ef_time_end) data_frame = pandas.DataFrame(index=ords, columns=ef_time_frame.index) for i in range(0, len(ef_time_frame.index)): vals = data_frame_rets.iloc[start_index[i]:finish_index[i]].values st = ef_time_start[i] en = ef_time_end[i] # Add extra "future" history in case we are doing an event study which goes outside our data window # (will just be filled with NaN) if len(vals) < len(lst_ords): extend = np.zeros((len(lst_ords) - len(vals), 1)) * np.nan # If start window date is before we have data if st < data_frame_rets.index[0]: vals = np.append(extend, vals) # If end date window is after we have data else: vals = np.append(vals, extend) data_frame[ef_time_frame.index[i]] = vals data_frame.index.names = [None] if create_index: calculations = Calculations() data_frame.iloc[-minute_start + min_offset] = numpy.nan data_frame = calculations.create_mult_index(data_frame) else: if vol is True: # Annualise (if vol) data_frame = data_frame.rolling( center=False, window=5).std() * math.sqrt(ann_factor) elif cumsum: data_frame = data_frame.cumsum() # Adjust DataFrame so zero point shows zero returns if adj_cumsum_zero_point: ind = abs(minute_start) - adj_zero_point for i, c in enumerate(data_frame.columns): data_frame[ c] = data_frame[c] - data_frame[c].values[ind] return data_frame
class EventsFactory(EventStudy): """Provides methods to fetch data on economic data events and to perform basic event studies for market data around these events. Note, requires a file of input of the following (transposed as columns!) - we give an example for NFP released on 7 Feb 2003 (note, that release-date-time-full, need not be fully aligned by row). USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.Date 31/01/2003 00:00 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.close xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release 143 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-median xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-average xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-low xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high.1 xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.number-observations xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision 185 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision-date 20030307 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-dt 20030207 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-date-time-full 08/01/1999 13:30 """ # _econ_data_frame = None # where your HDF5 file is stored with economic data # TODO integrate with on the fly downloading! _hdf5_file_econ_file = MarketConstants().hdf5_file_econ_file _db_database_econ_file = MarketConstants().db_database_econ_file ### manual offset for certain events where Bloomberg/data vendor displays the wrong date (usually because of time differences) _offset_events = {'AUD-Australia Labor Force Employment Change SA.release-dt' : 1} def __init__(self, df = None): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() self.speed_cache = SpeedCache() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return def load_economic_events(self): self._econ_data_frame = self.speed_cache.get_dataframe(self._db_database_econ_file) if self._econ_data_frame is None: # self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk(self._hdf5_file_econ_file) self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk( self._db_database_econ_file, engine=marketconstants.write_engine, db_server=marketconstants.db_server, db_port=marketconstants.db_port, username=marketconstants.db_username, password=marketconstants.db_password) self.speed_cache.put_dataframe(self._db_database_econ_file, self._econ_data_frame) def harvest_category(self, category_name): cat = self.config.get_categories_from_tickers_selective_filter(category_name) for k in cat: md_request = self.market_data_generator.populate_md_request(k) data_frame = self.market_data_generator.fetch_market_data(md_request) # TODO allow merge of multiple sources return data_frame def get_economic_events(self): return self._econ_data_frame def dump_economic_events_csv(self, path): self._econ_data_frame.to_csv(path) def get_economic_event_date_time(self, name, event = None, csv = None): ticker = self.create_event_desciptor_field(name, event, "release-date-time-full") if csv is None: data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker] else: dateparse = lambda x: datetime.datetime.strptime(x, '%d/%m/%Y %H:%M') data_frame = pandas.read_csv(csv, index_col=0, parse_dates = True, date_parser=dateparse) data_frame = data_frame[pandas.notnull(data_frame.index)] start_date = datetime.datetime.strptime("01-Jan-1971", "%d-%b-%Y") self.filter.filter_time_series_by_date(start_date, None, data_frame) return data_frame def get_economic_event_date_time_dataframe(self, name, event = None, csv = None): series = self.get_economic_event_date_time(name, event, csv) data_frame = pandas.DataFrame(series.values, index=series.index) data_frame.columns.name = self.create_event_desciptor_field(name, event, "release-date-time-full") return data_frame def get_economic_event_date_time_fields(self, fields, name, event = None): ### acceptible fields # observation-date <- observation time for the index # actual-release # survey-median # survey-average # survey-high # survey-low # survey-high # number-observations # release-dt # release-date-time-full # first-revision # first-revision-date ticker = [] # construct tickers of the form USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release for i in range(0, len(fields)): ticker.append(self.create_event_desciptor_field(name, event, fields[i])) # index on the release-dt field eg. 20101230 (we shall convert this later) ticker_index = self.create_event_desciptor_field(name, event, "release-dt") ######## grab event date/times event_date_time = self.get_economic_event_date_time(name, event) date_time_fore = event_date_time.index # create dates for join later date_time_dt = [datetime.datetime( date_time_fore[x].year, date_time_fore[x].month, date_time_fore[x].day) for x in range(len(date_time_fore))] event_date_time_frame = pandas.DataFrame(event_date_time.index, date_time_dt) event_date_time_frame.index = date_time_dt ######## grab event date/fields self._econ_data_frame[name + ".observation-date"] = self._econ_data_frame.index data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker_index] data_frame = data_frame[data_frame.index != 0] # eliminate any 0 dates (artifact of Excel) data_frame = data_frame[pandas.notnull(data_frame.index)] # eliminate any NaN dates (artifact of Excel) ind_dt = data_frame.index # convert yyyymmdd format to datetime data_frame.index = [datetime.datetime( int((ind_dt[x] - (ind_dt[x] % 10000))/10000), int(((ind_dt[x] % 10000) - (ind_dt[x] % 100))/100), int(ind_dt[x] % 100)) for x in range(len(ind_dt))] # HACK! certain events need an offset because BBG have invalid dates if ticker_index in self._offset_events: data_frame.index = data_frame.index + timedelta(days=self._offset_events[ticker_index]) ######## join together event dates/date-time/fields in one data frame data_frame = event_date_time_frame.join(data_frame, how='inner') data_frame.index = pandas.to_datetime(data_frame.index) data_frame.index.name = ticker_index return data_frame def create_event_desciptor_field(self, name, event, field): if event is None: return name + "." + field else: return name + "-" + event + "." + field def get_all_economic_events_date_time(self): event_names = self.get_all_economic_events() columns = ['event-name', 'release-date-time-full'] data_frame = pandas.DataFrame(data=numpy.zeros((0,len(columns))), columns=columns) for event in event_names: event_times = self.get_economic_event_date_time(event) for time in event_times: data_frame.append({'event-name':event, 'release-date-time-full':time}, ignore_index=True) return data_frame def get_all_economic_events(self): field_names = self._econ_data_frame.columns.values event_names = [x.split('.')[0] for x in field_names if '.Date' in x] event_names_filtered = [x for x in event_names if len(x) > 4] # sort list alphabetically (and remove any duplicates) return list(set(event_names_filtered)) def get_economic_event_date(self, name, event = None): return self._econ_data_frame[ self.create_event_desciptor_field(name, event, ".release-dt")] def get_economic_event_ret_over_custom_event_day(self, data_frame_in, name, event, start, end, lagged = False, NYC_cutoff = 10): # get the times of events event_dates = self.get_economic_event_date_time(name, event) return super(EventsFactory, self).get_economic_event_ret_over_custom_event_day(data_frame_in, event_dates, name, event, start, end, lagged = lagged, NYC_cutoff = NYC_cutoff) def get_economic_event_vol_over_event_day(self, vol_in, name, event, start, end, realised = False): return self.get_economic_event_ret_over_custom_event_day(vol_in, name, event, start, end, lagged = realised) # return super(EventsFactory, self).get_economic_event_ret_over_event_day(vol_in, name, event, start, end, lagged = realised) def get_daily_moves_over_event(self): # TODO pass # return only US events etc. by dates def get_intraday_moves_over_event(self, data_frame_rets, cross, event_fx, event_name, start, end, vol, mins = 3 * 60, min_offset = 0, create_index = False, resample = False, freq = 'minutes'): ef_time_frame = self.get_economic_event_date_time_dataframe(event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame) return self.get_intraday_moves_over_custom_event(data_frame_rets, ef_time_frame, vol, mins = mins, min_offset = min_offset, create_index = create_index, resample = resample, freq = freq)#, start, end) def get_surprise_against_intraday_moves_over_event(self, data_frame_cross_orig, cross, event_fx, event_name, start, end, offset_list = [1, 5, 30, 60], add_surprise = False, surprise_field = 'survey-average'): fields = ['actual-release', 'survey-median', 'survey-average', 'survey-high', 'survey-low'] ef_time_frame = self.get_economic_event_date_time_fields(fields, event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame) return self.get_surprise_against_intraday_moves_over_custom_event(data_frame_cross_orig, ef_time_frame, cross, event_fx, event_name, start, end, offset_list = offset_list, add_surprise = add_surprise, surprise_field = surprise_field)
md_request = MarketDataRequest( start_date=start_date, finish_date=finish_date, data_source='bloomberg', cut='BGN', category='fx-vol-market', tickers=cross, fx_vol_tenor=['1W', '1M', '3M'], cache_algo='cache_algo_return', base_depos_currencies=[cross[0:3], cross[3:6]]) df_vol_market = market.fetch_market(md_request) df_vol_market = df_vol_market.fillna(method='ffill') # Remove New Year's Day and Christmas df_vol_market = Filter().filter_time_series_by_holidays(df_vol_market, cal='FX') # Get a total return index for trading spot # This way we can take into account carry when calculating delta hedging P&L md_request = MarketDataRequest(start_date=start_date, finish_date=finish_date, data_source='bloomberg', cut='NYC', category='fx-tot', tickers=cross, cache_algo='cache_algo_return') df_tot = market.fetch_market(md_request) df_vol_market = df_vol_market.join(df_tot, how='left') df_vol_market = df_vol_market.fillna(method='ffill')
def calculate_leverage_factor(self, returns_df, vol_target, vol_max_leverage, vol_periods=60, vol_obs_in_year=252, vol_rebalance_freq='BM', data_resample_freq=None, data_resample_type='mean', returns=True, period_shift=0): """Calculates the time series of leverage for a specified vol target Parameters ---------- returns_df : DataFrame Asset returns vol_target : float vol target for assets vol_max_leverage : float maximum leverage allowed vol_periods : int number of periods to calculate volatility vol_obs_in_year : int number of observations in the year vol_rebalance_freq : str how often to rebalance vol_resample_type : str do we need to resample the underlying data first? (eg. have we got intraday data?) returns : boolean is this returns time series or prices? period_shift : int should we delay the signal by a number of periods? Returns ------- pandas.Dataframe """ calculations = Calculations() filter = Filter() if data_resample_freq is not None: return # TODO not implemented yet if not returns: returns_df = calculations.calculate_returns(returns_df) roll_vol_df = calculations.rolling_volatility(returns_df, periods=vol_periods, obs_in_year=vol_obs_in_year).shift( period_shift) # calculate the leverage as function of vol target (with max lev constraint) lev_df = vol_target / roll_vol_df lev_df[lev_df > vol_max_leverage] = vol_max_leverage lev_df = filter.resample_time_series_frequency(lev_df, vol_rebalance_freq, data_resample_type) returns_df, lev_df = returns_df.align(lev_df, join='left', axis=0) lev_df = lev_df.fillna(method='ffill') lev_df.ix[0:vol_periods] = numpy.nan # ignore the first elements before the vol window kicks in return lev_df
md_request = MarketDataRequest( start_date=start_date, finish_date=finish_date, data_source='bloomberg', cut='BGN', category='fx-vol-market', tickers=cross, fx_vol_tenor=['1W', '1M', '3M'], cache_algo='cache_algo_return', base_depos_currencies=[cross[0:3], cross[3:6]]) df = market.fetch_market(md_request) df = df.fillna(method='ffill') # Remove New Year's Day and Christmas df = Filter().filter_time_series_by_holidays(df, cal='FX') # We want to roll long 1M ATM call at expiry # We'll mark to market the price through the month by interpolating between 1W and 1M (and using whole vol curve # at each tenor) fx_options_curve = FXOptionsCurve( fx_options_trading_tenor=fx_options_trading_tenor, roll_days_before=0, roll_event='expiry-date', roll_months=1, fx_options_tenor_for_interpolation=['1W', '1M'], strike='atm', contract_type='european-call', depo_tenor_for_option='1M', position_multiplier=1.0, output_calculation_fields=True)
def compare_strategy_vs_benchmark(self, br, strategy_df, benchmark_df): """ compare_strategy_vs_benchmark - Compares the trading strategy we are backtesting against a benchmark Parameters ---------- br : BacktestRequest Parameters for backtest such as start and finish dates strategy_df : pandas.DataFrame Strategy time series benchmark_df : pandas.DataFrame Benchmark time series """ include_benchmark = False calc_stats = False if hasattr(br, 'include_benchmark'): include_benchmark = br.include_benchmark if hasattr(br, 'calc_stats'): calc_stats = br.calc_stats if include_benchmark: ret_stats = RetStats() risk_engine = RiskEngine() filter = Filter() calculations = Calculations() # align strategy time series with that of benchmark strategy_df, benchmark_df = strategy_df.align(benchmark_df, join='left', axis=0) # if necessary apply vol target to benchmark (to make it comparable with strategy) if hasattr(br, 'portfolio_vol_adjust'): if br.portfolio_vol_adjust is True: benchmark_df = risk_engine.calculate_vol_adjusted_index_from_prices( benchmark_df, br=br) # only calculate return statistics if this has been specified (note when different frequencies of data # might underrepresent vol # if calc_stats: benchmark_df = benchmark_df.fillna(method='ffill') ret_stats.calculate_ret_stats_from_prices(benchmark_df, br.ann_factor) if calc_stats: benchmark_df.columns = ret_stats.summary() # realign strategy & benchmark strategy_benchmark_df = strategy_df.join(benchmark_df, how='inner') strategy_benchmark_df = strategy_benchmark_df.fillna( method='ffill') strategy_benchmark_df = filter.filter_time_series_by_date( br.plot_start, br.finish_date, strategy_benchmark_df) strategy_benchmark_df = calculations.create_mult_index_from_prices( strategy_benchmark_df) self._benchmark_pnl = benchmark_df self._benchmark_ret_stats = ret_stats return strategy_benchmark_df return strategy_df
def calculate_leverage_factor(self, returns_df, vol_target, vol_max_leverage, vol_periods=60, vol_obs_in_year=252, vol_rebalance_freq='BM', data_resample_freq=None, data_resample_type='mean', returns=True, period_shift=0): """ calculate_leverage_factor - Calculates the time series of leverage for a specified vol target Parameters ---------- returns_df : DataFrame Asset returns vol_target : float vol target for assets vol_max_leverage : float maximum leverage allowed vol_periods : int number of periods to calculate volatility vol_obs_in_year : int number of observations in the year vol_rebalance_freq : str how often to rebalance vol_resample_type : str do we need to resample the underlying data first? (eg. have we got intraday data?) returns : boolean is this returns time series or prices? period_shift : int should we delay the signal by a number of periods? Returns ------- pandas.Dataframe """ calculations = Calculations() filter = Filter() if data_resample_freq is not None: return # TODO not implemented yet if not returns: returns_df = calculations.calculate_returns(returns_df) roll_vol_df = calculations.rolling_volatility( returns_df, periods=vol_periods, obs_in_year=vol_obs_in_year).shift(period_shift) # calculate the leverage as function of vol target (with max lev constraint) lev_df = vol_target / roll_vol_df lev_df[lev_df > vol_max_leverage] = vol_max_leverage lev_df = filter.resample_time_series_frequency(lev_df, vol_rebalance_freq, data_resample_type) returns_df, lev_df = returns_df.align(lev_df, join='left', axis=0) lev_df = lev_df.fillna(method='ffill') lev_df.ix[ 0: vol_periods] = numpy.nan # ignore the first elements before the vol window kicks in return lev_df
class EventsFactory(EventStudy): """Provides methods to fetch data on economic data events and to perform basic event studies for market data around these events. Note, requires a file of input of the following (transposed as columns!) - we give an example for NFP released on 7 Feb 2003 (note, that release-date-time-full, need not be fully aligned by row). USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.Date 31/01/2003 00:00 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.close xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release 143 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-median xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-average xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-low xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high.1 xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.number-observations xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision 185 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision-date 20030307 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-dt 20030207 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-date-time-full 08/01/1999 13:30 """ # _econ_data_frame = None # where your HDF5 file is stored with economic data # TODO integrate with on the fly downloading! _hdf5_file_econ_file = MarketConstants().hdf5_file_econ_file _db_database_econ_file = MarketConstants().db_database_econ_file ### manual offset for certain events where Bloomberg/data vendor displays the wrong date (usually because of time differences) _offset_events = { 'AUD-Australia Labor Force Employment Change SA.release-dt': 1 } def __init__(self, df=None): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return def load_economic_events(self): # self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk(self._hdf5_file_econ_file) self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk( self._db_database_econ_file, engine=MarketConstants().write_engine, db_server=MarketConstants().db_server, username=MarketConstants().db_username, password=MarketConstants().db_password) def harvest_category(self, category_name): cat = self.config.get_categories_from_tickers_selective_filter( category_name) for k in cat: md_request = self.market_data_generator.populate_md_request(k) data_frame = self.market_data_generator.fetch_market_data( md_request) # TODO allow merge of multiple sources return data_frame def get_economic_events(self): return self._econ_data_frame def dump_economic_events_csv(self, path): self._econ_data_frame.to_csv(path) def get_economic_event_date_time(self, name, event=None, csv=None): ticker = self.create_event_desciptor_field(name, event, "release-date-time-full") if csv is None: data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker] else: dateparse = lambda x: datetime.datetime.strptime( x, '%d/%m/%Y %H:%M') data_frame = pandas.read_csv(csv, index_col=0, parse_dates=True, date_parser=dateparse) data_frame = data_frame[pandas.notnull(data_frame.index)] start_date = datetime.datetime.strptime("01-Jan-1971", "%d-%b-%Y") self.filter.filter_time_series_by_date(start_date, None, data_frame) return data_frame def get_economic_event_date_time_dataframe(self, name, event=None, csv=None): series = self.get_economic_event_date_time(name, event, csv) data_frame = pandas.DataFrame(series.values, index=series.index) data_frame.columns.name = self.create_event_desciptor_field( name, event, "release-date-time-full") return data_frame def get_economic_event_date_time_fields(self, fields, name, event=None): ### acceptible fields # observation-date <- observation time for the index # actual-release # survey-median # survey-average # survey-high # survey-low # survey-high # number-observations # release-dt # release-date-time-full # first-revision # first-revision-date ticker = [] # construct tickers of the form USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release for i in range(0, len(fields)): ticker.append( self.create_event_desciptor_field(name, event, fields[i])) # index on the release-dt field eg. 20101230 (we shall convert this later) ticker_index = self.create_event_desciptor_field( name, event, "release-dt") ######## grab event date/times event_date_time = self.get_economic_event_date_time(name, event) date_time_fore = event_date_time.index # create dates for join later date_time_dt = [ datetime.datetime(date_time_fore[x].year, date_time_fore[x].month, date_time_fore[x].day) for x in range(len(date_time_fore)) ] event_date_time_frame = pandas.DataFrame(event_date_time.index, date_time_dt) event_date_time_frame.index = date_time_dt ######## grab event date/fields self._econ_data_frame[ name + ".observation-date"] = self._econ_data_frame.index data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker_index] data_frame = data_frame[data_frame.index != 0] # eliminate any 0 dates (artifact of Excel) data_frame = data_frame[pandas.notnull( data_frame.index)] # eliminate any NaN dates (artifact of Excel) ind_dt = data_frame.index # convert yyyymmdd format to datetime data_frame.index = [ datetime.datetime( int((ind_dt[x] - (ind_dt[x] % 10000)) / 10000), int(((ind_dt[x] % 10000) - (ind_dt[x] % 100)) / 100), int(ind_dt[x] % 100)) for x in range(len(ind_dt)) ] # HACK! certain events need an offset because BBG have invalid dates if ticker_index in self._offset_events: data_frame.index = data_frame.index + timedelta( days=self._offset_events[ticker_index]) ######## join together event dates/date-time/fields in one data frame data_frame = event_date_time_frame.join(data_frame, how='inner') data_frame.index = pandas.to_datetime(data_frame.index) data_frame.index.name = ticker_index return data_frame def create_event_desciptor_field(self, name, event, field): if event is None: return name + "." + field else: return name + "-" + event + "." + field def get_all_economic_events_date_time(self): event_names = self.get_all_economic_events() columns = ['event-name', 'release-date-time-full'] data_frame = pandas.DataFrame(data=numpy.zeros((0, len(columns))), columns=columns) for event in event_names: event_times = self.get_economic_event_date_time(event) for time in event_times: data_frame.append( { 'event-name': event, 'release-date-time-full': time }, ignore_index=True) return data_frame def get_all_economic_events(self): field_names = self._econ_data_frame.columns.values event_names = [x.split('.')[0] for x in field_names if '.Date' in x] event_names_filtered = [x for x in event_names if len(x) > 4] # sort list alphabetically (and remove any duplicates) return list(set(event_names_filtered)) def get_economic_event_date(self, name, event=None): return self._econ_data_frame[self.create_event_desciptor_field( name, event, ".release-dt")] def get_economic_event_ret_over_custom_event_day(self, data_frame_in, name, event, start, end, lagged=False, NYC_cutoff=10): # get the times of events event_dates = self.get_economic_event_date_time(name, event) return super(EventsFactory, self).get_economic_event_ret_over_custom_event_day( data_frame_in, event_dates, name, event, start, end, lagged=lagged, NYC_cutoff=NYC_cutoff) def get_economic_event_vol_over_event_day(self, vol_in, name, event, start, end, realised=False): return self.get_economic_event_ret_over_custom_event_day( vol_in, name, event, start, end, lagged=realised) # return super(EventsFactory, self).get_economic_event_ret_over_event_day(vol_in, name, event, start, end, lagged = realised) def get_daily_moves_over_event(self): # TODO pass # return only US events etc. by dates def get_intraday_moves_over_event(self, data_frame_rets, cross, event_fx, event_name, start, end, vol, mins=3 * 60, min_offset=0, create_index=False, resample=False, freq='minutes'): ef_time_frame = self.get_economic_event_date_time_dataframe( event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date( start, end, ef_time_frame) return self.get_intraday_moves_over_custom_event( data_frame_rets, ef_time_frame, vol, mins=mins, min_offset=min_offset, create_index=create_index, resample=resample, freq=freq) #, start, end) def get_surprise_against_intraday_moves_over_event( self, data_frame_cross_orig, cross, event_fx, event_name, start, end, offset_list=[1, 5, 30, 60], add_surprise=False, surprise_field='survey-average'): fields = [ 'actual-release', 'survey-median', 'survey-average', 'survey-high', 'survey-low' ] ef_time_frame = self.get_economic_event_date_time_fields( fields, event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date( start, end, ef_time_frame) return self.get_surprise_against_intraday_moves_over_custom_event( data_frame_cross_orig, ef_time_frame, cross, event_fx, event_name, start, end, offset_list=offset_list, add_surprise=add_surprise, surprise_field=surprise_field)
class MarketDataGenerator(object): _time_series_cache = {} # shared across all instances of object! def __init__(self): self.config = ConfigManager().get_instance() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.calculations = Calculations() self.io_engine = IOEngine() self._intraday_code = -1 return def flush_cache(self): """ flush_cache - Flushs internal cache of time series """ self._time_series_cache = {} def set_intraday_code(self, code): self._intraday_code = code def get_data_vendor(self, source): """ get_loader - Loads appropriate data service class Parameters ---------- source : str the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc. we can also have forms like "bloomberg-boe" separated by hyphens Returns ------- DataVendor """ data_vendor = None source = source.split("-")[0] if source == 'bloomberg': from findatapy.market.datavendorbbg import DataVendorBBGOpen data_vendor = DataVendorBBGOpen() elif source == 'quandl': from findatapy.market.datavendorweb import DataVendorQuandl data_vendor = DataVendorQuandl() elif source == 'ons': from findatapy.market.datavendorweb import DataVendorONS data_vendor = DataVendorONS() elif source == 'boe': from findatapy.market.datavendorweb import DataVendorBOE data_vendor = DataVendorBOE() elif source == 'dukascopy': from findatapy.market.datavendorweb import DataVendorDukasCopy data_vendor = DataVendorDukasCopy() elif source == 'alfred': from findatapy.market.datavendorweb import DataVendorALFRED data_vendor = DataVendorALFRED() elif source in [ 'yahoo', 'google', 'fred', 'oecd', 'eurostat', 'edgar-index' ]: from findatapy.market.datavendorweb import DataVendorPandasWeb data_vendor = DataVendorPandasWeb() # TODO add support for other data sources (like Reuters) return data_vendor def fetch_market_data(self, market_data_request, kill_session=True): """ fetch_market_data - Loads time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ tickers = market_data_request.tickers data_vendor = self.get_data_vendor(market_data_request.data_source) # check if tickers have been specified (if not load all of them for a category) # also handle single tickers/list tickers create_tickers = False if tickers is None: create_tickers = True elif isinstance(tickers, str): if tickers == '': create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: market_data_request.tickers = ConfigManager().get_instance( ).get_tickers_list_for_category(market_data_request.category, market_data_request.data_source, market_data_request.freq, market_data_request.cut) # intraday or tick: only one ticker per cache file if (market_data_request.freq in ['intraday', 'tick', 'second', 'hour', 'minute']): data_frame_agg = self.download_intraday_tick( market_data_request, data_vendor) # daily: multiple tickers per cache file - assume we make one API call to vendor library else: data_frame_agg = self.download_daily(market_data_request, data_vendor) if ('internet_load' in market_data_request.cache_algo): self.logger.debug("Internet loading.. ") # signal to data_vendor template to exit session # if data_vendor is not None and kill_session == True: data_vendor.kill_session() if (market_data_request.cache_algo == 'cache_algo'): self.logger.debug( "Only caching data in memory, do not return any time series.") return # only return time series if specified in the algo if 'return' in market_data_request.cache_algo: # special case for events/events-dt which is not indexed like other tables if market_data_request.category is not None: if 'events' in market_data_request.category: return data_frame_agg try: return self.filter.filter_time_series(market_data_request, data_frame_agg, pad_columns=True) except: import traceback self.logger.error(traceback.format_exc()) return None def get_market_data_cached(self, market_data_request): """ get_time_series_cached - Loads time series from cache (if it exists) Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ if (market_data_request.freq == "intraday"): ticker = market_data_request.tickers else: ticker = None fname = self.create_time_series_hash_key(market_data_request, ticker) if (fname in self._time_series_cache): data_frame = self._time_series_cache[fname] return self.filter.filter_time_series(market_data_request, data_frame) return None def create_time_series_hash_key(self, market_data_request, ticker=None): """ create_time_series_hash_key - Creates a hash key for retrieving the time series Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ if (isinstance(ticker, list)): ticker = ticker[0] return self.create_cache_file_name( self.create_category_key(market_data_request, ticker)) def download_intraday_tick(self, market_data_request, data_vendor): """ download_intraday_tick - Loads intraday time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ data_frame_agg = None calcuations = Calculations() ticker_cycle = 0 data_frame_group = [] # single threaded version # handle intraday ticker calls separately one by one if len(market_data_request.tickers) == 1 or DataConstants( ).market_thread_no['other'] == 1: for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway data_frame_single = data_vendor.load_ticker( market_data_request_single) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') data_frame_group.append(data_frame_single) # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single # key = self.create_category_key(market_data_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_group is not None: data_frame_agg = calcuations.pandas_outer_join( data_frame_group) return data_frame_agg else: market_data_request_list = [] # create a list of MarketDataRequests for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if hasattr(market_data_request, 'vendor_tickers'): market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 market_data_request_list.append(market_data_request_single) return self.fetch_group_time_series(market_data_request_list) def fetch_single_time_series(self, market_data_request): data_frame_single = self.get_data_vendor( market_data_request.data_source).load_ticker(market_data_request) if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' # will fail for dataframes which includes dates try: data_frame_single = data_frame_single.astype('float32') except: pass if market_data_request.freq == "second": data_frame_single = data_frame_single.resample("1s") return data_frame_single def fetch_group_time_series(self, market_data_request_list): data_frame_agg = None # depends on the nature of operation as to whether we should use threading or multiprocessing library if DataConstants().market_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = DataConstants().market_thread_no['other'] if market_data_request_list[0].data_source in DataConstants( ).market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request_list[0].data_source] if thread_no > 0: pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, market_data_request_list) data_frame_group = result.get() pool.close() pool.join() else: data_frame_group = [] for md_request in market_data_request_list: data_frame_group.append( self.fetch_single_time_series(md_request)) # collect together all the time series if data_frame_group is not None: data_frame_group = [i for i in data_frame_group if i is not None] if data_frame_group is not None: data_frame_agg = self.calculations.pandas_outer_join( data_frame_group) return data_frame_agg def download_daily(self, market_data_request, data_vendor): """ download_daily - Loads daily time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ # daily data does not include ticker in the key, as multiple tickers in the same file if DataConstants().market_thread_no['other'] == 1: data_frame_agg = data_vendor.load_ticker(market_data_request) else: market_data_request_list = [] # when trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution group_size = max( int( len(market_data_request.tickers) / DataConstants().market_thread_no['other'] - 1), 0) if group_size == 0: group_size = 1 # split up tickers into groups related to number of threads to call for i in range(0, len(market_data_request.tickers), group_size): market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = market_data_request.tickers[ i:i + group_size] if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = \ market_data_request.vendor_tickers[i:i + group_size] market_data_request_list.append(market_data_request_single) data_frame_agg = self.fetch_group_time_series( market_data_request_list) key = self.create_category_key(market_data_request) fname = self.create_cache_file_name(key) self._time_series_cache[ fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg def create_category_key(self, market_data_request, ticker=None): """ create_category_key - Returns a category key for the associated MarketDataRequest Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ category = 'default-cat' cut = 'default-cut' if market_data_request.category is not None: category = market_data_request.category environment = market_data_request.environment source = market_data_request.data_source freq = market_data_request.freq if market_data_request.cut is not None: cut = market_data_request.cut if (ticker is not None): key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker else: key = environment + "." + category + '.' + source + '.' + freq + '.' + cut return key def create_cache_file_name(self, filename): return DataConstants().folder_time_series_data + "/" + filename
def get_intraday_moves_over_custom_event(self, data_frame_rets, ef_time_frame, vol=False, minute_start=5, mins=3 * 60, min_offset=0, create_index=False, resample=False, freq='minutes'): filter = Filter() ef_time_frame = filter.filter_time_series_by_date( data_frame_rets.index[0], data_frame_rets.index[-1], ef_time_frame) ef_time = ef_time_frame.index if freq == 'minutes': ef_time_start = ef_time - timedelta(minutes=minute_start) ef_time_end = ef_time + timedelta(minutes=mins) ann_factor = 252 * 1440 elif freq == 'days': ef_time = ef_time_frame.index.normalize() ef_time_start = ef_time - timedelta(days=minute_start) ef_time_end = ef_time + timedelta(days=mins) ann_factor = 252 ords = range(-minute_start + min_offset, mins + min_offset) # all data needs to be equally spaced if resample: # make sure time series is properly sampled at 1 min intervals data_frame_rets = data_frame_rets.resample('1min') data_frame_rets = data_frame_rets.fillna(value=0) data_frame_rets = filter.remove_out_FX_out_of_hours( data_frame_rets) data_frame_rets['Ind'] = numpy.nan start_index = data_frame_rets.index.searchsorted(ef_time_start) finish_index = data_frame_rets.index.searchsorted(ef_time_end) # not all observation windows will be same length (eg. last one?) # fill the indices which represent minutes # TODO vectorise this! for i in range(0, len(ef_time_frame.index)): try: data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords except: data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords[0:(finish_index[i] - start_index[i])] # set the release dates data_frame_rets.ix[start_index, 'Rel'] = ef_time # set entry points data_frame_rets.ix[finish_index + 1, 'Rel'] = numpy.zeros( len(start_index)) # set exit points data_frame_rets['Rel'] = data_frame_rets['Rel'].fillna( method='pad') # fill down signals data_frame_rets = data_frame_rets[pandas.notnull( data_frame_rets['Ind'])] # get rid of other data_frame = data_frame_rets.pivot(index='Ind', columns='Rel', values=data_frame_rets.columns[0]) data_frame.index.names = [None] if create_index: calculations = Calculations() data_frame.ix[-minute_start + min_offset, :] = numpy.nan data_frame = calculations.create_mult_index(data_frame) else: if vol is True: # annualise (if vol) data_frame = data_frame.rolling( center=False, window=5).std() * math.sqrt(ann_factor) else: data_frame = data_frame.cumsum() return data_frame
def __init__( self, market_data_generator=None, fx_options_trading_tenor=market_constants.fx_options_trading_tenor, roll_days_before=market_constants.fx_options_roll_days_before, roll_event=market_constants.fx_options_roll_event, construct_via_currency='no', fx_options_tenor_for_interpolation=market_constants. fx_options_tenor_for_interpolation, base_depos_tenor=data_constants.base_depos_tenor, roll_months=market_constants.fx_options_roll_months, cum_index=market_constants.fx_options_cum_index, strike=market_constants.fx_options_index_strike, contract_type=market_constants.fx_options_index_contract_type, premium_output=market_constants.fx_options_index_premium_output, position_multiplier=1, depo_tenor_for_option=market_constants.fx_options_depo_tenor, freeze_implied_vol=market_constants.fx_options_freeze_implied_vol, tot_label='', cal=None, output_calculation_fields=market_constants. output_calculation_fields): """Initializes FXForwardsCurve Parameters ---------- market_data_generator : MarketDataGenerator Used for downloading market data fx_options_trading_tenor : str What is primary forward contract being used to trade (default - '1M') roll_days_before : int Number of days before roll event to enter into a new forwards contract roll_event : str What constitutes a roll event? ('month-end', 'quarter-end', 'year-end', 'expiry') cum_index : str In total return index, do we compute in additive or multiplicative way ('add' or 'mult') construct_via_currency : str What currency should we construct the forward via? Eg. if we asked for AUDJPY we can construct it via AUDUSD & JPYUSD forwards, as opposed to AUDJPY forwards (default - 'no') fx_options_tenor_for_interpolation : str(list) Which forwards should we use for interpolation base_depos_tenor : str(list) Which base deposits tenors do we need (this is only necessary if we want to start inferring depos) roll_months : int After how many months should we initiate a roll. Typically for trading 1M this should 1, 3M this should be 3 etc. tot_label : str Postfix for the total returns field cal : str Calendar to use for expiry (if None, uses that of FX pair) output_calculation_fields : bool Also output additional data should forward expiries etc. alongside total returns indices """ self._market_data_generator = market_data_generator self._calculations = Calculations() self._calendar = Calendar() self._filter = Filter() self._fx_options_trading_tenor = fx_options_trading_tenor self._roll_days_before = roll_days_before self._roll_event = roll_event self._construct_via_currency = construct_via_currency self._fx_options_tenor_for_interpolation = fx_options_tenor_for_interpolation self._base_depos_tenor = base_depos_tenor self._roll_months = roll_months self._cum_index = cum_index self._contact_type = contract_type self._strike = strike self._premium_output = premium_output self._position_multiplier = position_multiplier self._depo_tenor_for_option = depo_tenor_for_option self._freeze_implied_vol = freeze_implied_vol self._tot_label = tot_label self._cal = cal self._output_calculation_fields = output_calculation_fields
# if __name__ == "__main__": ###### below line CRUCIAL when running Windows, otherwise multiprocessing # doesn"t work! (not necessary on Linux) from findatapy.util import SwimPool; SwimPool() from findatapy.timeseries import Filter, Calendar, Calculations import pandas as pd calculations = Calculations() calendar = Calendar() filter = Filter() # choose run_example = 0 for everything # run_example = 1 - combine intraday dataframe with daily data dataframe run_example = 0 if run_example == 1 or run_example == 0: df_intraday = pd.DataFrame( index=pd.date_range(start="01 Jan 2020", end="10 Jan 2020", freq="1min"), columns=["ones"]) df_intraday["ones"] = 1 df_intraday = df_intraday.tz_localize("utc")
# limitations under the License. # if __name__ == "__main__": ###### below line CRUCIAL when running Windows, otherwise multiprocessing # doesn"t work! (not necessary on Linux) from findatapy.util import SwimPool; SwimPool() from findatapy.timeseries import Filter, Calendar import pandas as pd calendar = Calendar() filter = Filter() # choose run_example = 0 for everything # run_example = 1 - get holidays for FX, EUR and EURUSD, as well as # listing weekends # run_example = 2 - get FX delivery dates and FX option expiries for # various tenors # run_example = 3 - get number of days between pandas DatetimeIndex # run_example = 4 - filter time series by EURUSD holidays # run_example = 4 - option expiries for USDJPY run_example = 0 if run_example == 1 or run_example == 0: # Get the holidays (which aren"t weekends) print(calendar.get_holidays(start_date="01 Jan 1999 00:50",
def get_intraday_moves_over_custom_event(self, data_frame_rets, ef_time_frame, vol=False, minute_start = 5, mins = 3 * 60, min_offset = 0 , create_index = False, resample = False, freq = 'minutes'): filter = Filter() ef_time_frame = filter.filter_time_series_by_date(data_frame_rets.index[0], data_frame_rets.index[-1], ef_time_frame) ef_time = ef_time_frame.index if freq == 'minutes': ef_time_start = ef_time - timedelta(minutes = minute_start) ef_time_end = ef_time + timedelta(minutes = mins) ann_factor = 252 * 1440 elif freq == 'days': ef_time = ef_time_frame.index.normalize() ef_time_start = ef_time - timedelta(days = minute_start) ef_time_end = ef_time + timedelta(days = mins) ann_factor = 252 ords = range(-minute_start + min_offset, mins + min_offset) # all data needs to be equally spaced if resample: # make sure time series is properly sampled at 1 min intervals data_frame_rets = data_frame_rets.resample('1min') data_frame_rets = data_frame_rets.fillna(value = 0) data_frame_rets = filter.remove_out_FX_out_of_hours(data_frame_rets) data_frame_rets['Ind'] = numpy.nan start_index = data_frame_rets.index.searchsorted(ef_time_start) finish_index = data_frame_rets.index.searchsorted(ef_time_end) # not all observation windows will be same length (eg. last one?) # fill the indices which represent minutes # TODO vectorise this! for i in range(0, len(ef_time_frame.index)): try: data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords except: data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords[0:(finish_index[i] - start_index[i])] # set the release dates data_frame_rets.ix[start_index,'Rel'] = ef_time # set entry points data_frame_rets.ix[finish_index + 1,'Rel'] = numpy.zeros(len(start_index)) # set exit points data_frame_rets['Rel'] = data_frame_rets['Rel'].fillna(method = 'pad') # fill down signals data_frame_rets = data_frame_rets[pandas.notnull(data_frame_rets['Ind'])] # get rid of other data_frame = data_frame_rets.pivot(index='Ind', columns='Rel', values=data_frame_rets.columns[0]) data_frame.index.names = [None] if create_index: calculations = Calculations() data_frame.ix[-minute_start + min_offset,:] = numpy.nan data_frame = calculations.create_mult_index(data_frame) else: if vol is True: # annualise (if vol) data_frame = data_frame.rolling(center=False,window=5).std() * math.sqrt(ann_factor) else: data_frame = data_frame.cumsum() return data_frame
class FXForwardsCurve(object): """Constructs continuous forwards time series total return indices from underlying forwards contracts. """ def __init__(self, market_data_generator=None, fx_forwards_trading_tenor=market_constants.fx_forwards_trading_tenor, roll_days_before=market_constants.fx_forwards_roll_days_before, roll_event=market_constants.fx_forwards_roll_event, construct_via_currency='no', fx_forwards_tenor_for_interpolation=market_constants.fx_forwards_tenor_for_interpolation, base_depos_tenor=data_constants.base_depos_tenor, roll_months=market_constants.fx_forwards_roll_months, output_calculation_fields=market_constants.output_calculation_fields): """Initializes FXForwardsCurve Parameters ---------- market_data_generator : MarketDataGenerator Used for downloading market data fx_forwards_trading_tenor : str What is primary forward contract being used to trade (default - '1M') roll_days_before : int Number of days before roll event to enter into a new forwards contract roll_event : str What constitutes a roll event? ('month-end', 'quarter-end', 'year-end', 'expiry') construct_via_currency : str What currency should we construct the forward via? Eg. if we asked for AUDJPY we can construct it via AUDUSD & JPYUSD forwards, as opposed to AUDJPY forwards (default - 'no') fx_forwards_tenor_for_interpolation : str(list) Which forwards should we use for interpolation base_depos_tenor : str(list) Which base deposits tenors do we need (this is only necessary if we want to start inferring depos) roll_months : int After how many months should we initiate a roll. Typically for trading 1M this should 1, 3M this should be 3 etc. output_calculation_fields : bool Also output additional data should forward expiries etc. alongside total returns indices """ self._market_data_generator = market_data_generator self._calculations = Calculations() self._calendar = Calendar() self._filter = Filter() self._fx_forwards_trading_tenor = fx_forwards_trading_tenor self._roll_days_before = roll_days_before self._roll_event = roll_event self._construct_via_currency = construct_via_currency self._fx_forwards_tenor_for_interpolation = fx_forwards_tenor_for_interpolation self._base_depos_tenor = base_depos_tenor self._roll_months = roll_months self._output_calcultion_fields = output_calculation_fields def generate_key(self): from findatapy.market.ioengine import SpeedCache # Don't include any "large" objects in the key return SpeedCache().generate_key(self, ['_market_data_generator', '_calculations', '_calendar']) def fetch_continuous_time_series(self, md_request, market_data_generator, fx_forwards_trading_tenor=None, roll_days_before=None, roll_event=None, construct_via_currency=None, fx_forwards_tenor_for_interpolation=None, base_depos_tenor=None, roll_months=None, output_calculation_fields=False): if market_data_generator is None: market_data_generator = self._market_data_generator if fx_forwards_trading_tenor is None: fx_forwards_trading_tenor = self._fx_forwards_trading_tenor if roll_days_before is None: roll_days_before = self._roll_days_before if roll_event is None: roll_event = self._roll_event if construct_via_currency is None: construct_via_currency = self._construct_via_currency if fx_forwards_tenor_for_interpolation is None: fx_forwards_tenor_for_interpolation = self._fx_forwards_tenor_for_interpolation if base_depos_tenor is None: base_depos_tenor = self._base_depos_tenor if roll_months is None: roll_months = self._roll_months if output_calculation_fields is None: output_calculation_fields # Eg. we construct EURJPY via EURJPY directly (note: would need to have sufficient forward data for this) if construct_via_currency == 'no': # Download FX spot, FX forwards points and base depos etc. market = Market(market_data_generator=market_data_generator) md_request_download = MarketDataRequest(md_request=md_request) fx_conv = FXConv() # CAREFUL: convert the tickers to correct notation, eg. USDEUR => EURUSD, because our data # should be fetched in correct convention md_request_download.tickers = [fx_conv.correct_notation(x) for x in md_request.tickers] md_request_download.category = 'fx-forwards-market' md_request_download.fields = 'close' md_request_download.abstract_curve = None md_request_download.fx_forwards_tenor = fx_forwards_tenor_for_interpolation md_request_download.base_depos_tenor = base_depos_tenor forwards_market_df = market.fetch_market(md_request_download) # Now use the original tickers return self.construct_total_return_index(md_request.tickers, forwards_market_df, fx_forwards_trading_tenor=fx_forwards_trading_tenor, roll_days_before=roll_days_before, roll_event=roll_event, fx_forwards_tenor_for_interpolation=fx_forwards_tenor_for_interpolation, roll_months=roll_months, output_calculation_fields=output_calculation_fields) else: # eg. we calculate via your domestic currency such as USD, so returns will be in your domestic currency # Hence AUDJPY would be calculated via AUDUSD and JPYUSD (subtracting the difference in returns) total_return_indices = [] for tick in md_request.tickers: base = tick[0:3] terms = tick[3:6] md_request_base = MarketDataRequest(md_request=md_request) md_request_base.tickers = base + construct_via_currency md_request_terms = MarketDataRequest(md_request=md_request) md_request_terms.tickers = terms + construct_via_currency # Construct the base and terms separately (ie. AUDJPY => AUDUSD & JPYUSD) base_vals = self.fetch_continuous_time_series(md_request_base, market_data_generator, fx_forwards_trading_tenor=fx_forwards_trading_tenor, roll_days_before=roll_days_before, roll_event=roll_event, fx_forwards_tenor_for_interpolation=fx_forwards_tenor_for_interpolation, base_depos_tenor=base_depos_tenor, roll_months=roll_months, output_calculation_fields=False, construct_via_currency='no') terms_vals = self.fetch_continuous_time_series(md_request_terms, market_data_generator, fx_forwards_trading_tenor=fx_forwards_trading_tenor, roll_days_before=roll_days_before, roll_event=roll_event, fx_forwards_tenor_for_interpolation=fx_forwards_tenor_for_interpolation, base_depos_tenor=base_depos_tenor, roll_months=roll_months, output_calculation_fields=False, construct_via_currency='no') # Special case for USDUSD case (and if base or terms USD are USDUSD if base + terms == construct_via_currency + construct_via_currency: base_rets = self._calculations.calculate_returns(base_vals) cross_rets = pd.DataFrame(0, index=base_rets.index, columns=base_rets.columns) elif base + construct_via_currency == construct_via_currency + construct_via_currency: cross_rets = -self._calculations.calculate_returns(terms_vals) elif terms + construct_via_currency == construct_via_currency + construct_via_currency: cross_rets = self._calculations.calculate_returns(base_vals) else: base_rets = self._calculations.calculate_returns(base_vals) terms_rets = self._calculations.calculate_returns(terms_vals) cross_rets = base_rets.sub(terms_rets.iloc[:, 0], axis=0) # First returns of a time series will by NaN, given we don't know previous point cross_rets.iloc[0] = 0 cross_vals = self._calculations.create_mult_index(cross_rets) cross_vals.columns = [tick + '-forward-tot.close'] total_return_indices.append(cross_vals) return self._calculations.pandas_outer_join(total_return_indices) def unhedged_asset_fx(self, assets_df, asset_currency, home_curr, start_date, finish_date, spot_df=None): pass def hedged_asset_fx(self, assets_df, asset_currency, home_curr, start_date, finish_date, spot_df=None, total_return_indices_df=None): pass def get_day_count_conv(self, currency): if currency in market_constants.currencies_with_365_basis: return 365.0 return 360.0 def construct_total_return_index(self, cross_fx, forwards_market_df, fx_forwards_trading_tenor=market_constants.fx_forwards_trading_tenor, roll_days_before=market_constants.fx_forwards_roll_days_before, roll_event=market_constants.fx_forwards_roll_event, roll_months=1, fx_forwards_tenor_for_interpolation=market_constants.fx_forwards_tenor_for_interpolation, output_calculation_fields=False): if not (isinstance(cross_fx, list)): cross_fx = [cross_fx] total_return_index_agg = [] # Remove columns where there is no data (because these points typically aren't quoted) forwards_market_df = forwards_market_df.dropna(how='all', axis=1) fx_forwards_pricer = FXForwardsPricer() def get_roll_date(horizon_d, delivery_d, asset_hols, month_adj=1): if roll_event == 'month-end': roll_d = horizon_d + CustomBusinessMonthEnd(roll_months + month_adj, holidays=asset_hols) elif roll_event == 'delivery-date': roll_d = delivery_d return (roll_d - CustomBusinessDay(n=roll_days_before, holidays=asset_hols)) for cross in cross_fx: # Eg. if we specify USDUSD if cross[0:3] == cross[3:6]: total_return_index_agg.append( pd.DataFrame(100, index=forwards_market_df.index, columns=[cross + "-forward-tot.close"])) else: # Is the FX cross in the correct convention old_cross = cross cross = FXConv().correct_notation(cross) horizon_date = forwards_market_df.index delivery_date = [] roll_date = [] new_trade = np.full(len(horizon_date), False, dtype=bool) asset_holidays = self._filter.get_holidays(cal=cross) # Get first delivery date delivery_date.append( self._calendar.get_delivery_date_from_horizon_date(horizon_date[0], fx_forwards_trading_tenor, cal=cross, asset_class='fx')[0]) # For first month want it to expire within that month (for consistency), hence month_adj=0 ONLY here roll_date.append(get_roll_date(horizon_date[0], delivery_date[0], asset_holidays, month_adj=0)) # New trade => entry at beginning AND on every roll new_trade[0] = True # Get all the delivery dates and roll dates # At each "roll/trade" day we need to reset them for the new contract for i in range(1, len(horizon_date)): # If the horizon date has reached the roll date (from yesterday), we're done, and we have a # new roll/trade if (horizon_date[i] - roll_date[i-1]).days == 0: new_trade[i] = True # else: # new_trade[i] = False # If we're entering a new trade/contract, we need to get new delivery and roll dates if new_trade[i]: delivery_date.append(self._calendar.get_delivery_date_from_horizon_date(horizon_date[i], fx_forwards_trading_tenor, cal=cross, asset_class='fx')[0]) roll_date.append(get_roll_date(horizon_date[i], delivery_date[i], asset_holidays)) else: # Otherwise use previous delivery and roll dates, because we're still holding same contract delivery_date.append(delivery_date[i-1]) roll_date.append(roll_date[i-1]) interpolated_forward = fx_forwards_pricer.price_instrument(cross, horizon_date, delivery_date, market_df=forwards_market_df, fx_forwards_tenor_for_interpolation=fx_forwards_tenor_for_interpolation)[cross + '-interpolated-outright-forward.close'].values # To record MTM prices mtm = np.copy(interpolated_forward) # Note: may need to add discount factor to forwards? # Special case: for very first trading day # mtm[0] = interpolated_forward[0] # On rolling dates, MTM will be the previous forward contract (interpolated) # otherwise it will be the current forward contract for i in range(1, len(horizon_date)): if new_trade[i]: mtm[i] = fx_forwards_pricer.price_instrument(cross, horizon_date[i], delivery_date[i-1], market_df=forwards_market_df, fx_forwards_tenor_for_interpolation=fx_forwards_tenor_for_interpolation) \ [cross + '-interpolated-outright-forward.close'].values # else: # mtm[i] = interpolated_forward[i] # Eg. if we asked for USDEUR, we first constructed spot/forwards for EURUSD # and then need to invert it if old_cross != cross: mtm = 1.0 / mtm interpolated_forward = 1.0 / interpolated_forward cum_rets = 100 * np.cumprod(1.0 + mtm / np.roll(interpolated_forward, 1) - 1.0) total_return_index = pd.DataFrame(index=horizon_date, columns=[cross + "-forward-tot.close"]) total_return_index[cross + "-forward-tot.close"] = cum_rets if output_calculation_fields: total_return_index[cross + '-interpolated-outright-forward.close'] = interpolated_forward total_return_index[cross + '-mtm.close'] = mtm total_return_index[cross + '-roll.close'] = new_trade total_return_index[cross + '.roll-date'] = roll_date total_return_index[cross + '.delivery-date'] = delivery_date total_return_index_agg.append(total_return_index) return self._calculations.pandas_outer_join(total_return_index_agg)