def __init__(self): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() if (EventsFactory._econ_data_frame is None): self.load_economic_events() return
def __init__(self, df=None): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return
def __init__(self, df=None): super(EventStudy, self).__init__() self.config = ConfigManager() self.filter = Filter() self.io_engine = IOEngine() self.speed_cache = SpeedCache() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return
def __init__(self, df = None): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() self.speed_cache = SpeedCache() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return
class EventsFactory(EventStudy): """Provides methods to fetch data on economic data events and to perform basic event studies for market data around these events. Note, requires a file of input of the following (transposed as columns!) - we give an example for NFP released on 7 Feb 2003 (note, that release-date-time-full, need not be fully aligned by row). USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.Date 31/01/2003 00:00 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.close xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release 143 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-median xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-average xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-low xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high.1 xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.number-observations xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision 185 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision-date 20030307 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-dt 20030207 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-date-time-full 08/01/1999 13:30 """ # _econ_data_frame = None # where your HDF5 file is stored with economic data # TODO integrate with on the fly downloading! _hdf5_file_econ_file = MarketConstants().hdf5_file_econ_file _db_database_econ_file = MarketConstants().db_database_econ_file ### manual offset for certain events where Bloomberg/data vendor displays the wrong date (usually because of time differences) _offset_events = {'AUD-Australia Labor Force Employment Change SA.release-dt' : 1} def __init__(self, df = None): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() self.speed_cache = SpeedCache() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return def load_economic_events(self): self._econ_data_frame = self.speed_cache.get_dataframe(self._db_database_econ_file) if self._econ_data_frame is None: # self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk(self._hdf5_file_econ_file) self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk( self._db_database_econ_file, engine=marketconstants.write_engine, db_server=marketconstants.db_server, db_port=marketconstants.db_port, username=marketconstants.db_username, password=marketconstants.db_password) self.speed_cache.put_dataframe(self._db_database_econ_file, self._econ_data_frame) def harvest_category(self, category_name): cat = self.config.get_categories_from_tickers_selective_filter(category_name) for k in cat: md_request = self.market_data_generator.populate_md_request(k) data_frame = self.market_data_generator.fetch_market_data(md_request) # TODO allow merge of multiple sources return data_frame def get_economic_events(self): return self._econ_data_frame def dump_economic_events_csv(self, path): self._econ_data_frame.to_csv(path) def get_economic_event_date_time(self, name, event = None, csv = None): ticker = self.create_event_desciptor_field(name, event, "release-date-time-full") if csv is None: data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker] else: dateparse = lambda x: datetime.datetime.strptime(x, '%d/%m/%Y %H:%M') data_frame = pandas.read_csv(csv, index_col=0, parse_dates = True, date_parser=dateparse) data_frame = data_frame[pandas.notnull(data_frame.index)] start_date = datetime.datetime.strptime("01-Jan-1971", "%d-%b-%Y") self.filter.filter_time_series_by_date(start_date, None, data_frame) return data_frame def get_economic_event_date_time_dataframe(self, name, event = None, csv = None): series = self.get_economic_event_date_time(name, event, csv) data_frame = pandas.DataFrame(series.values, index=series.index) data_frame.columns.name = self.create_event_desciptor_field(name, event, "release-date-time-full") return data_frame def get_economic_event_date_time_fields(self, fields, name, event = None): ### acceptible fields # observation-date <- observation time for the index # actual-release # survey-median # survey-average # survey-high # survey-low # survey-high # number-observations # release-dt # release-date-time-full # first-revision # first-revision-date ticker = [] # construct tickers of the form USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release for i in range(0, len(fields)): ticker.append(self.create_event_desciptor_field(name, event, fields[i])) # index on the release-dt field eg. 20101230 (we shall convert this later) ticker_index = self.create_event_desciptor_field(name, event, "release-dt") ######## grab event date/times event_date_time = self.get_economic_event_date_time(name, event) date_time_fore = event_date_time.index # create dates for join later date_time_dt = [datetime.datetime( date_time_fore[x].year, date_time_fore[x].month, date_time_fore[x].day) for x in range(len(date_time_fore))] event_date_time_frame = pandas.DataFrame(event_date_time.index, date_time_dt) event_date_time_frame.index = date_time_dt ######## grab event date/fields self._econ_data_frame[name + ".observation-date"] = self._econ_data_frame.index data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker_index] data_frame = data_frame[data_frame.index != 0] # eliminate any 0 dates (artifact of Excel) data_frame = data_frame[pandas.notnull(data_frame.index)] # eliminate any NaN dates (artifact of Excel) ind_dt = data_frame.index # convert yyyymmdd format to datetime data_frame.index = [datetime.datetime( int((ind_dt[x] - (ind_dt[x] % 10000))/10000), int(((ind_dt[x] % 10000) - (ind_dt[x] % 100))/100), int(ind_dt[x] % 100)) for x in range(len(ind_dt))] # HACK! certain events need an offset because BBG have invalid dates if ticker_index in self._offset_events: data_frame.index = data_frame.index + timedelta(days=self._offset_events[ticker_index]) ######## join together event dates/date-time/fields in one data frame data_frame = event_date_time_frame.join(data_frame, how='inner') data_frame.index = pandas.to_datetime(data_frame.index) data_frame.index.name = ticker_index return data_frame def create_event_desciptor_field(self, name, event, field): if event is None: return name + "." + field else: return name + "-" + event + "." + field def get_all_economic_events_date_time(self): event_names = self.get_all_economic_events() columns = ['event-name', 'release-date-time-full'] data_frame = pandas.DataFrame(data=numpy.zeros((0,len(columns))), columns=columns) for event in event_names: event_times = self.get_economic_event_date_time(event) for time in event_times: data_frame.append({'event-name':event, 'release-date-time-full':time}, ignore_index=True) return data_frame def get_all_economic_events(self): field_names = self._econ_data_frame.columns.values event_names = [x.split('.')[0] for x in field_names if '.Date' in x] event_names_filtered = [x for x in event_names if len(x) > 4] # sort list alphabetically (and remove any duplicates) return list(set(event_names_filtered)) def get_economic_event_date(self, name, event = None): return self._econ_data_frame[ self.create_event_desciptor_field(name, event, ".release-dt")] def get_economic_event_ret_over_custom_event_day(self, data_frame_in, name, event, start, end, lagged = False, NYC_cutoff = 10): # get the times of events event_dates = self.get_economic_event_date_time(name, event) return super(EventsFactory, self).get_economic_event_ret_over_custom_event_day(data_frame_in, event_dates, name, event, start, end, lagged = lagged, NYC_cutoff = NYC_cutoff) def get_economic_event_vol_over_event_day(self, vol_in, name, event, start, end, realised = False): return self.get_economic_event_ret_over_custom_event_day(vol_in, name, event, start, end, lagged = realised) # return super(EventsFactory, self).get_economic_event_ret_over_event_day(vol_in, name, event, start, end, lagged = realised) def get_daily_moves_over_event(self): # TODO pass # return only US events etc. by dates def get_intraday_moves_over_event(self, data_frame_rets, cross, event_fx, event_name, start, end, vol, mins = 3 * 60, min_offset = 0, create_index = False, resample = False, freq = 'minutes'): ef_time_frame = self.get_economic_event_date_time_dataframe(event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame) return self.get_intraday_moves_over_custom_event(data_frame_rets, ef_time_frame, vol, mins = mins, min_offset = min_offset, create_index = create_index, resample = resample, freq = freq)#, start, end) def get_surprise_against_intraday_moves_over_event(self, data_frame_cross_orig, cross, event_fx, event_name, start, end, offset_list = [1, 5, 30, 60], add_surprise = False, surprise_field = 'survey-average'): fields = ['actual-release', 'survey-median', 'survey-average', 'survey-high', 'survey-low'] ef_time_frame = self.get_economic_event_date_time_fields(fields, event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame) return self.get_surprise_against_intraday_moves_over_custom_event(data_frame_cross_orig, ef_time_frame, cross, event_fx, event_name, start, end, offset_list = offset_list, add_surprise = add_surprise, surprise_field = surprise_field)
market = Market(market_data_generator=MarketDataGenerator()) # In the config file, we can use keywords 'open', 'high', 'low', 'close' and 'volume' for Yahoo and Google finance data # Download equities data from Yahoo md_request = MarketDataRequest( start_date="decade", # start date data_source='yahoo', # use Bloomberg as data source tickers=['Apple', 'Citigroup'], # ticker (findatapy) fields=['close'], # which fields to download vendor_tickers=['aapl', 'c'], # ticker (Yahoo) vendor_fields=['Close']) # which Bloomberg fields to download) df = market.fetch_market(md_request) io = IOEngine() # Note: you need to set up Man-AHL's Arctic and MongoDB database for this to work # write to Arctic (to MongoDB) - by default use's Arctic's VersionStore io.write_time_series_cache_to_disk('stocks', df, engine='arctic', db_server='127.0.0.1') # Read back from Arctic df_arctic = io.read_time_series_cache_from_disk('stocks', engine='arctic', db_server='127.0.0.1') print(df_arctic.tail(n=5))
class EventsFactory(EventStudy): """Provides methods to fetch data on economic data events and to perform basic event studies for market data around these events. Note, requires a file of input of the following (transposed as columns!) - we give an example for NFP released on 7 Feb 2003 (note, that release-date-time-full, need not be fully aligned by row). USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.Date 31/01/2003 00:00 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.close xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release 143 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-median xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-average xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-low xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high.1 xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.number-observations xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision 185 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision-date 20030307 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-dt 20030207 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-date-time-full 08/01/1999 13:30 """ # _econ_data_frame = None # where your HDF5 file is stored with economic data # TODO integrate with on the fly downloading! _hdf5_file_econ_file = MarketConstants().hdf5_file_econ_file _db_database_econ_file = MarketConstants().db_database_econ_file ### manual offset for certain events where Bloomberg/data vendor displays the wrong date (usually because of time differences) _offset_events = {'AUD-Australia Labor Force Employment Change SA.release-dt' : 1} def __init__(self, df = None): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() self.speed_cache = SpeedCache() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return def load_economic_events(self): self._econ_data_frame = self.speed_cache.get_dataframe(self._db_database_econ_file) if self._econ_data_frame is None: self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk( self._db_database_econ_file, engine=marketconstants.write_engine, db_server=marketconstants.db_server, db_port=marketconstants.db_port, username=marketconstants.db_username, password=marketconstants.db_password) self.speed_cache.put_dataframe(self._db_database_econ_file, self._econ_data_frame) def harvest_category(self, category_name): cat = self.config.get_categories_from_tickers_selective_filter(category_name) for k in cat: md_request = self.market_data_generator.populate_md_request(k) data_frame = self.market_data_generator.fetch_market_data(md_request) # TODO allow merge of multiple sources return data_frame def get_economic_events(self): return self._econ_data_frame def dump_economic_events_csv(self, path): self._econ_data_frame.to_csv(path) def get_economic_event_date_time(self, name, event = None, csv = None): ticker = self.create_event_desciptor_field(name, event, "release-date-time-full") if csv is None: data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker] else: dateparse = lambda x: datetime.datetime.strptime(x, '%d/%m/%Y %H:%M') data_frame = pandas.read_csv(csv, index_col=0, parse_dates = True, date_parser=dateparse) data_frame = data_frame[pandas.notnull(data_frame.index)] #start at a really early date start_date = datetime.datetime.strptime("01-Jan-1971", "%d-%b-%Y") self.filter.filter_time_series_by_date(start_date, None, data_frame) return data_frame def get_economic_event_date_time_dataframe(self, name, event = None, csv = None): series = self.get_economic_event_date_time(name, event, csv) data_frame = pandas.DataFrame(series.values, index=series.index) data_frame.columns.name = self.create_event_desciptor_field(name, event, "release-date-time-full") return data_frame def get_economic_event_date_time_fields(self, fields, name, event = None): ### acceptible fields # observation-date <- observation time for the index # actual-release # survey-median # survey-average # survey-high # survey-low # survey-high # number-observations # release-dt # release-date-time-full # first-revision # first-revision-date ticker = [] # construct tickers of the form USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release for i in range(0, len(fields)): ticker.append(self.create_event_desciptor_field(name, event, fields[i])) # index on the release-dt field eg. 20101230 (we shall convert this later) ticker_index = self.create_event_desciptor_field(name, event, "release-dt") ######## grab event date/times event_date_time = self.get_economic_event_date_time(name, event) date_time_fore = event_date_time.index # create dates for join later date_time_dt = [datetime.datetime( date_time_fore[x].year, date_time_fore[x].month, date_time_fore[x].day) for x in range(len(date_time_fore))] event_date_time_frame = pandas.DataFrame(event_date_time.index, date_time_dt) event_date_time_frame.index = date_time_dt ######## grab event date/fields self._econ_data_frame[name + ".observation-date"] = self._econ_data_frame.index data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker_index] data_frame = data_frame[data_frame.index != 0] # eliminate any 0 dates (artifact of Excel) data_frame = data_frame[pandas.notnull(data_frame.index)] # eliminate any NaN dates (artifact of Excel) ind_dt = data_frame.index # convert yyyymmdd format to datetime data_frame.index = [datetime.datetime( int((ind_dt[x] - (ind_dt[x] % 10000))/10000), int(((ind_dt[x] % 10000) - (ind_dt[x] % 100))/100), int(ind_dt[x] % 100)) for x in range(len(ind_dt))] # HACK! certain events need an offset because BBG have invalid dates if ticker_index in self._offset_events: data_frame.index = data_frame.index + timedelta(days=self._offset_events[ticker_index]) ######## join together event dates/date-time/fields in one data frame data_frame = event_date_time_frame.join(data_frame, how='inner') data_frame.index = pandas.to_datetime(data_frame.index) data_frame.index.name = ticker_index return data_frame def create_event_desciptor_field(self, name, event, field): if event is None: return name + "." + field else: return name + "-" + event + "." + field def get_all_economic_events_date_time(self): event_names = self.get_all_economic_events() columns = ['event-name', 'release-date-time-full'] data_frame = pandas.DataFrame(data=numpy.zeros((0,len(columns))), columns=columns) for event in event_names: event_times = self.get_economic_event_date_time(event) for time in event_times: data_frame.append({'event-name':event, 'release-date-time-full':time}, ignore_index=True) return data_frame def get_all_economic_events(self): field_names = self._econ_data_frame.columns.values event_names = [x.split('.')[0] for x in field_names if '.Date' in x] event_names_filtered = [x for x in event_names if len(x) > 4] # sort list alphabetically (and remove any duplicates) return list(set(event_names_filtered)) def get_economic_event_date(self, name, event = None): return self._econ_data_frame[ self.create_event_desciptor_field(name, event, ".release-dt")] def get_economic_event_ret_over_custom_event_day(self, data_frame_in, name, event, start, end, lagged = False, NYC_cutoff = 10): # get the times of events event_dates = self.get_economic_event_date_time(name, event) return super(EventsFactory, self).get_economic_event_ret_over_custom_event_day(data_frame_in, event_dates, name, event, start, end, lagged = lagged, NYC_cutoff = NYC_cutoff) def get_economic_event_vol_over_event_day(self, vol_in, name, event, start, end, realised = False): return self.get_economic_event_ret_over_custom_event_day(vol_in, name, event, start, end, lagged = realised) def get_daily_moves_over_event(self): # TODO pass # return only US events etc. by dates def get_intraday_moves_over_event(self, data_frame_rets, cross, event_fx, event_name, start, end, vol, mins = 3 * 60, min_offset = 0, create_index = False, resample = False, freq = 'minutes'): ef_time_frame = self.get_economic_event_date_time_dataframe(event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame) return self.get_intraday_moves_over_custom_event(data_frame_rets, ef_time_frame, vol, mins = mins, min_offset = min_offset, create_index = create_index, resample = resample, freq = freq)#, start, end) def get_surprise_against_intraday_moves_over_event(self, data_frame_cross_orig, cross, event_fx, event_name, start, end, offset_list = [1, 5, 30, 60], add_surprise = False, surprise_field = 'survey-average'): fields = ['actual-release', 'survey-median', 'survey-average', 'survey-high', 'survey-low'] ef_time_frame = self.get_economic_event_date_time_fields(fields, event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame) return self.get_surprise_against_intraday_moves_over_custom_event(data_frame_cross_orig, ef_time_frame, cross, event_fx, event_name, start, end, offset_list = offset_list, add_surprise = add_surprise, surprise_field = surprise_field)