def tickers(self, tickers): if tickers is not None: if not isinstance(tickers, list): tickers = [tickers] config = None new_tickers = [] if tickers is not None: for tick in tickers: if '*' in tick: start = '' if tick[-1] == "*" and tick[0] != "*": start = "^" tick = start + "(" + tick.replace('*', '') + ")" if config is None: from findatapy.util import ConfigManager config = ConfigManager().get_instance() new_tickers.append(config.get_filtered_tickers_list_for_category( self.__category, self.__data_source, self.__freq, self.__cut, tick)) else: new_tickers.append(tick) new_tickers = self._flatten_list(new_tickers) self.__tickers = new_tickers else: self.__tickers = tickers
def __init__(self): self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.calculations = Calculations() self.io_engine = IOEngine() self._intraday_code = -1 return
def __init__(self): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() if (EventsFactory._econ_data_frame is None): self.load_economic_events() return
def __init__(self, df=None): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return
def __init__(self, df=None): super(EventStudy, self).__init__() self.config = ConfigManager() self.filter = Filter() self.io_engine = IOEngine() self.speed_cache = SpeedCache() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return
def refine_expiry_date(self, market_data_request): # expiry date if market_data_request.expiry_date is None: ConfigManager().get_instance().get_expiry_for_ticker(market_data_request.data_source, market_data_request.ticker) return market_data_request
def __init__(self): super(DataVendor, self).__init__() self.logger = LoggerManager().getLogger(__name__) import logging logging.getLogger("requests").setLevel(logging.WARNING) self.config = ConfigManager()
def __init__(self): self.config = ConfigManager().get_instance() self.filter = Filter() self.calculations = Calculations() self.io_engine = IOEngine() self._intraday_code = -1 self.days_expired_intraday_contract_download = -1 return
def __init__(self, data_vendor_dict={}): self._config = ConfigManager().get_instance() self._filter = Filter() self._calculations = Calculations() self._io_engine = IOEngine() self._intraday_code = -1 self._days_expired_intraday_contract_download = -1 self._data_vendor_dict = data_vendor_dict return
def __init__(self, log_every_day = 1): self.config = ConfigManager().get_instance() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.calculations = Calculations() self.io_engine = IOEngine() self._intraday_code = -1 self.days_expired_intraday_contract_download = -1 self.log_every_day = log_every_day return
def __init__(self, df = None): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() self.speed_cache = SpeedCache() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return
class DataVendor(object): """Abstract class for various data source loaders. """ def __init__(self): self.config = ConfigManager().get_instance() # self.config = None return @abc.abstractmethod def load_ticker(self, market_data_request): """Retrieves market data from external data source Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ return # to be implemented by subclasses @abc.abstractmethod def kill_session(self): return def construct_vendor_market_data_request(self, market_data_request): """Creates a MarketDataRequest with the vendor tickers Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- MarketDataRequest """ symbols_vendor = self.translate_to_vendor_ticker(market_data_request) fields_vendor = self.translate_to_vendor_field(market_data_request) market_data_request_vendor = MarketDataRequest( md_request=market_data_request) market_data_request_vendor.tickers = symbols_vendor market_data_request_vendor.fields = fields_vendor return market_data_request_vendor def translate_to_vendor_field(self, market_data_request): """Converts all the fields from findatapy fields to vendor fields Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ if market_data_request.vendor_fields is not None: return market_data_request.vendor_fields source = market_data_request.data_source fields_list = market_data_request.fields if isinstance(fields_list, str): fields_list = [fields_list] if self.config is None: return fields_list fields_converted = [] for field in fields_list: try: f = self.config.convert_library_to_vendor_field(source, field) except: logger = LoggerManager().getLogger(__name__) logger.warn( "Couldn't find field conversion, did you type it correctly: " + field) return fields_converted.append(f) return fields_converted # Translate findatapy ticker to vendor ticker def translate_to_vendor_ticker(self, market_data_request): """Converts all the tickers from findatapy tickers to vendor tickers Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ if market_data_request.vendor_tickers is not None: return market_data_request.vendor_tickers category = market_data_request.category source = market_data_request.data_source freq = market_data_request.freq cut = market_data_request.cut tickers_list = market_data_request.tickers if isinstance(tickers_list, str): tickers_list = [tickers_list] if self.config is None: return tickers_list tickers_list_converted = [] for ticker in tickers_list: try: t = self.config.convert_library_to_vendor_ticker( category, source, freq, cut, ticker) except: logger = LoggerManager().getLogger(__name__) logger.error( "Couldn't find ticker conversion, did you type it correctly: " + ticker) return tickers_list_converted.append(t) return tickers_list_converted def translate_from_vendor_field(self, vendor_fields_list, market_data_request): """Converts all the fields from vendors fields to findatapy fields Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ data_source = market_data_request.data_source if isinstance(vendor_fields_list, str): vendor_fields_list = [vendor_fields_list] # if self.config is None: return vendor_fields_list fields_converted = [] # If we haven't set the configuration files for automatic configuration if market_data_request.vendor_fields is not None: dictionary = dict( zip( self.get_lower_case_list( market_data_request.vendor_fields), market_data_request.fields)) for vendor_field in vendor_fields_list: try: fields_converted.append(dictionary[vendor_field.lower()]) except: fields_converted.append(vendor_field) # Otherwise used stored configuration files (every field needs to be defined!) else: for vendor_field in vendor_fields_list: try: v = self.config.convert_vendor_to_library_field( data_source, vendor_field) except: logger = LoggerManager().getLogger(__name__) logger.error( "Couldn't find field conversion, did you type it correctly: " + vendor_field + ", using 'close' as default.") v = 'close' fields_converted.append(v) return fields_converted # Translate findatapy ticker to vendor ticker def translate_from_vendor_ticker(self, vendor_tickers_list, md_request): """Converts all the fields from vendor tickers to findatapy tickers Parameters ---------- md_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ if md_request.vendor_tickers is not None: dictionary = dict( zip(self.get_lower_case_list(md_request.vendor_tickers), md_request.tickers)) tickers_stuff = [] for vendor_ticker in vendor_tickers_list: tickers_stuff.append(dictionary[vendor_ticker.lower()]) return tickers_stuff # [item for sublist in tickers_stuff for item in sublist] # tickers_list = md_request.tickers if isinstance(vendor_tickers_list, str): vendor_tickers_list = [vendor_tickers_list] if self.config is None: return vendor_tickers_list tickers_converted = [] for vendor_ticker in vendor_tickers_list: try: v = self.config.convert_vendor_to_library_ticker( md_request.category, md_request.data_source, md_request.freq, md_request.cut, vendor_ticker) except: logger = LoggerManager().getLogger(__name__) logger.error( "Couldn't find ticker conversion, did you type it correctly: " + vendor_ticker) return tickers_converted.append(v) return tickers_converted def get_lower_case_list(self, lst): return [k.lower() for k in lst]
def fetch_single_time_series(self, market_data_request): market_data_request = MarketDataRequest(md_request=market_data_request) # only includes those tickers have not expired yet! start_date = pandas.Timestamp(market_data_request.start_date).date() import datetime current_date = datetime.datetime.utcnow().date() from datetime import timedelta tickers = market_data_request.tickers vendor_tickers = market_data_request.vendor_tickers expiry_date = market_data_request.expiry_date config = ConfigManager().get_instance() # in many cases no expiry is defined so skip them for i in range(0, len(tickers)): try: expiry_date = config.get_expiry_for_ticker( market_data_request.data_source, tickers[i]) except: pass if expiry_date is not None: expiry_date = pandas.Timestamp(expiry_date).date() # use pandas Timestamp, a bit more robust with weird dates (can fail if comparing date vs datetime) # if the expiry is before the start date of our download don't bother downloading this ticker if expiry_date < start_date: tickers[i] = None # special case for futures-contracts which are intraday # avoid downloading if the expiry date is very far in the past # (we need this before there might be odd situations where we run on an expiry date, but still want to get # data right till expiry time) if market_data_request.category == 'futures-contracts' and market_data_request.freq == 'intraday' \ and self.days_expired_intraday_contract_download > 0: if expiry_date + timedelta( days=self.days_expired_intraday_contract_download ) < current_date: tickers[i] = None if vendor_tickers is not None and tickers[i] is None: vendor_tickers[i] = None market_data_request.tickers = [e for e in tickers if e != None] if vendor_tickers is not None: market_data_request.vendor_tickers = [ e for e in vendor_tickers if e != None ] data_frame_single = None if len(market_data_request.tickers) > 0: data_frame_single = self.get_data_vendor( market_data_request.data_source).load_ticker( market_data_request) #print(data_frame_single.head(n=10)) if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' # will fail for dataframes which includes dates/strings (eg. futures contract names) try: data_frame_single = data_frame_single.astype('float32') except: self.logger.warning('Could not convert to float') if market_data_request.freq == "second": data_frame_single = data_frame_single.resample("1s") return data_frame_single
def fetch_market_data(self, market_data_request, kill_session=True): """Loads time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ # data_vendor = self.get_data_vendor(market_data_request.data_source) # check if tickers have been specified (if not load all of them for a category) # also handle single tickers/list tickers create_tickers = False if market_data_request.vendor_tickers is not None and market_data_request.tickers is None: market_data_request.tickers = market_data_request.vendor_tickers tickers = market_data_request.tickers if tickers is None: create_tickers = True elif isinstance(tickers, str): if tickers == '': create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: market_data_request.tickers = ConfigManager().get_instance( ).get_tickers_list_for_category(market_data_request.category, market_data_request.data_source, market_data_request.freq, market_data_request.cut) # intraday or tick: only one ticker per cache file if (market_data_request.freq in ['intraday', 'tick', 'second', 'hour', 'minute']): data_frame_agg = self.download_intraday_tick(market_data_request) # return data_frame_agg # daily: multiple tickers per cache file - assume we make one API call to vendor library else: data_frame_agg = self.download_daily(market_data_request) if ('internet_load' in market_data_request.cache_algo): self.logger.debug("Internet loading.. ") # signal to data_vendor template to exit session # if data_vendor is not None and kill_session == True: data_vendor.kill_session() if (market_data_request.cache_algo == 'cache_algo'): self.logger.debug( "Only caching data in memory, do not return any time series.") return # only return time series if specified in the algo if 'return' in market_data_request.cache_algo: # special case for events/events-dt which is not indexed like other tables (also same for downloading futures # contracts dates) if market_data_request.category is not None: if 'events' in market_data_request.category: return data_frame_agg # pad columns a second time (is this necessary to do here again?) # TODO only do this for not daily data? try: if data_frame_agg is not None: data_frame_agg = self.filter.filter_time_series(market_data_request, data_frame_agg, pad_columns=True)\ .dropna(how = 'all') # resample data using pandas if specified in the MarketDataRequest if market_data_request.resample is not None: if 'last' in market_data_request.resample_how: data_frame_agg = data_frame_agg.resample( market_data_request.resample).last() elif 'first' in market_data_request.resample_how: data_frame_agg = data_frame_agg.resample( market_data_request.resample).first() if 'dropna' in market_data_request.resample_how: data_frame_agg = data_frame_agg.dropna(how='all') else: self.logger.warn("No data returned for " + str(market_data_request.tickers)) return data_frame_agg except Exception as e: print(str(e)) if data_frame_agg is not None: return data_frame_agg import traceback self.logger.warn("No data returned for " + str(market_data_request.tickers)) return None
class DataVendor(object): def __init__(self): self.config = ConfigManager() # self.config = None return @abc.abstractmethod def load_ticker(self, market_data_request): """ load_ticker - Retrieves market data from external data source Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ return # to be implemented by subclasses @abc.abstractmethod def kill_session(self): return def construct_vendor_market_data_request(self, market_data_request): """ construct_vendor_market_data_request - creates a MarketDataRequest with the vendor tickers Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- MarketDataRequest """ symbols_vendor = self.translate_to_vendor_ticker(market_data_request) fields_vendor = self.translate_to_vendor_field(market_data_request) market_data_request_vendor = copy.copy(market_data_request) market_data_request_vendor.tickers = symbols_vendor market_data_request_vendor.fields = fields_vendor return market_data_request_vendor def translate_to_vendor_field(self, market_data_request): """ translate_to_vendor_field - Converts all the fields from findatapy fields to vendor fields Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ if market_data_request.vendor_fields is not None: return market_data_request.vendor_fields source = market_data_request.data_source fields_list = market_data_request.fields if isinstance(fields_list, str): fields_list = [fields_list] if self.config is None: return fields_list fields_converted = [] for field in fields_list: fields_converted.append( self.config.convert_library_to_vendor_field(source, field)) return fields_converted # translate findatapy ticker to vendor ticker def translate_to_vendor_ticker(self, market_data_request): """ translate_to_vendor_tickers - Converts all the tickers from findatapy tickers to vendor tickers Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ if market_data_request.vendor_tickers is not None: return market_data_request.vendor_tickers category = market_data_request.category source = market_data_request.data_source freq = market_data_request.freq cut = market_data_request.cut tickers_list = market_data_request.tickers if isinstance(tickers_list, str): tickers_list = [tickers_list] if self.config is None: return tickers_list tickers_list_converted = [] for ticker in tickers_list: tickers_list_converted.append( self.config.convert_library_to_vendor_ticker( category, source, freq, cut, ticker)) return tickers_list_converted def translate_from_vendor_field(self, vendor_fields_list, market_data_request): """ translate_from_vendor_field - Converts all the fields from vendors fields to findatapy fields Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ data_source = market_data_request.data_source if isinstance(vendor_fields_list, str): vendor_fields_list = [vendor_fields_list] # if self.config is None: return vendor_fields_list fields_converted = [] # if we haven't set the configuration files for automatic configuration if market_data_request.vendor_fields is not None: dictionary = dict( zip(market_data_request.vendor_fields, market_data_request.fields)) for vendor_field in vendor_fields_list: try: fields_converted.append(dictionary[vendor_field]) except: fields_converted.append(vendor_field) # otherwise used stored configuration files (every field needs to be defined!) else: for vendor_field in vendor_fields_list: fields_converted.append( self.config.convert_vendor_to_library_field( data_source, vendor_field)) return fields_converted # translate findatapy ticker to vendor ticker def translate_from_vendor_ticker(self, vendor_tickers_list, market_data_request): """ translate_from_vendor_ticker - Converts all the fields from vendor tickers to findatapy tickers Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ if market_data_request.vendor_tickers is not None: dictionary = dict( zip(market_data_request.vendor_tickers, market_data_request.tickers)) tickers_stuff = [] for vendor_ticker in vendor_tickers_list: tickers_stuff.append(dictionary[vendor_ticker]) return tickers_stuff # [item for sublist in tickers_stuff for item in sublist] data_source = market_data_request.data_source # tickers_list = market_data_request.tickers if isinstance(vendor_tickers_list, str): vendor_tickers_list = [vendor_tickers_list] if self.config is None: return vendor_tickers_list tickers_converted = [] for vendor_ticker in vendor_tickers_list: tickers_converted.append( self.config.convert_vendor_to_library_ticker( data_source, vendor_ticker)) return tickers_converted
class EventsFactory(EventStudy): """Provides methods to fetch data on economic data events and to perform basic event studies for market data around these events. Note, requires a file of input of the following (transposed as columns!) - we give an example for NFP released on 7 Feb 2003 (note, that release-date-time-full, need not be fully aligned by row). USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.Date 31/01/2003 00:00 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.close xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release 143 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-median xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-average xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-low xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high.1 xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.number-observations xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision 185 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision-date 20030307 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-dt 20030207 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-date-time-full 08/01/1999 13:30 """ # _econ_data_frame = None # where your HDF5 file is stored with economic data # TODO integrate with on the fly downloading! _hdf5_file_econ_file = MarketConstants().hdf5_file_econ_file _db_database_econ_file = MarketConstants().db_database_econ_file ### manual offset for certain events where Bloomberg/data vendor displays the wrong date (usually because of time differences) _offset_events = {'AUD-Australia Labor Force Employment Change SA.release-dt' : 1} def __init__(self, df = None): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() self.speed_cache = SpeedCache() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return def load_economic_events(self): self._econ_data_frame = self.speed_cache.get_dataframe(self._db_database_econ_file) if self._econ_data_frame is None: # self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk(self._hdf5_file_econ_file) self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk( self._db_database_econ_file, engine=marketconstants.write_engine, db_server=marketconstants.db_server, db_port=marketconstants.db_port, username=marketconstants.db_username, password=marketconstants.db_password) self.speed_cache.put_dataframe(self._db_database_econ_file, self._econ_data_frame) def harvest_category(self, category_name): cat = self.config.get_categories_from_tickers_selective_filter(category_name) for k in cat: md_request = self.market_data_generator.populate_md_request(k) data_frame = self.market_data_generator.fetch_market_data(md_request) # TODO allow merge of multiple sources return data_frame def get_economic_events(self): return self._econ_data_frame def dump_economic_events_csv(self, path): self._econ_data_frame.to_csv(path) def get_economic_event_date_time(self, name, event = None, csv = None): ticker = self.create_event_desciptor_field(name, event, "release-date-time-full") if csv is None: data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker] else: dateparse = lambda x: datetime.datetime.strptime(x, '%d/%m/%Y %H:%M') data_frame = pandas.read_csv(csv, index_col=0, parse_dates = True, date_parser=dateparse) data_frame = data_frame[pandas.notnull(data_frame.index)] start_date = datetime.datetime.strptime("01-Jan-1971", "%d-%b-%Y") self.filter.filter_time_series_by_date(start_date, None, data_frame) return data_frame def get_economic_event_date_time_dataframe(self, name, event = None, csv = None): series = self.get_economic_event_date_time(name, event, csv) data_frame = pandas.DataFrame(series.values, index=series.index) data_frame.columns.name = self.create_event_desciptor_field(name, event, "release-date-time-full") return data_frame def get_economic_event_date_time_fields(self, fields, name, event = None): ### acceptible fields # observation-date <- observation time for the index # actual-release # survey-median # survey-average # survey-high # survey-low # survey-high # number-observations # release-dt # release-date-time-full # first-revision # first-revision-date ticker = [] # construct tickers of the form USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release for i in range(0, len(fields)): ticker.append(self.create_event_desciptor_field(name, event, fields[i])) # index on the release-dt field eg. 20101230 (we shall convert this later) ticker_index = self.create_event_desciptor_field(name, event, "release-dt") ######## grab event date/times event_date_time = self.get_economic_event_date_time(name, event) date_time_fore = event_date_time.index # create dates for join later date_time_dt = [datetime.datetime( date_time_fore[x].year, date_time_fore[x].month, date_time_fore[x].day) for x in range(len(date_time_fore))] event_date_time_frame = pandas.DataFrame(event_date_time.index, date_time_dt) event_date_time_frame.index = date_time_dt ######## grab event date/fields self._econ_data_frame[name + ".observation-date"] = self._econ_data_frame.index data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker_index] data_frame = data_frame[data_frame.index != 0] # eliminate any 0 dates (artifact of Excel) data_frame = data_frame[pandas.notnull(data_frame.index)] # eliminate any NaN dates (artifact of Excel) ind_dt = data_frame.index # convert yyyymmdd format to datetime data_frame.index = [datetime.datetime( int((ind_dt[x] - (ind_dt[x] % 10000))/10000), int(((ind_dt[x] % 10000) - (ind_dt[x] % 100))/100), int(ind_dt[x] % 100)) for x in range(len(ind_dt))] # HACK! certain events need an offset because BBG have invalid dates if ticker_index in self._offset_events: data_frame.index = data_frame.index + timedelta(days=self._offset_events[ticker_index]) ######## join together event dates/date-time/fields in one data frame data_frame = event_date_time_frame.join(data_frame, how='inner') data_frame.index = pandas.to_datetime(data_frame.index) data_frame.index.name = ticker_index return data_frame def create_event_desciptor_field(self, name, event, field): if event is None: return name + "." + field else: return name + "-" + event + "." + field def get_all_economic_events_date_time(self): event_names = self.get_all_economic_events() columns = ['event-name', 'release-date-time-full'] data_frame = pandas.DataFrame(data=numpy.zeros((0,len(columns))), columns=columns) for event in event_names: event_times = self.get_economic_event_date_time(event) for time in event_times: data_frame.append({'event-name':event, 'release-date-time-full':time}, ignore_index=True) return data_frame def get_all_economic_events(self): field_names = self._econ_data_frame.columns.values event_names = [x.split('.')[0] for x in field_names if '.Date' in x] event_names_filtered = [x for x in event_names if len(x) > 4] # sort list alphabetically (and remove any duplicates) return list(set(event_names_filtered)) def get_economic_event_date(self, name, event = None): return self._econ_data_frame[ self.create_event_desciptor_field(name, event, ".release-dt")] def get_economic_event_ret_over_custom_event_day(self, data_frame_in, name, event, start, end, lagged = False, NYC_cutoff = 10): # get the times of events event_dates = self.get_economic_event_date_time(name, event) return super(EventsFactory, self).get_economic_event_ret_over_custom_event_day(data_frame_in, event_dates, name, event, start, end, lagged = lagged, NYC_cutoff = NYC_cutoff) def get_economic_event_vol_over_event_day(self, vol_in, name, event, start, end, realised = False): return self.get_economic_event_ret_over_custom_event_day(vol_in, name, event, start, end, lagged = realised) # return super(EventsFactory, self).get_economic_event_ret_over_event_day(vol_in, name, event, start, end, lagged = realised) def get_daily_moves_over_event(self): # TODO pass # return only US events etc. by dates def get_intraday_moves_over_event(self, data_frame_rets, cross, event_fx, event_name, start, end, vol, mins = 3 * 60, min_offset = 0, create_index = False, resample = False, freq = 'minutes'): ef_time_frame = self.get_economic_event_date_time_dataframe(event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame) return self.get_intraday_moves_over_custom_event(data_frame_rets, ef_time_frame, vol, mins = mins, min_offset = min_offset, create_index = create_index, resample = resample, freq = freq)#, start, end) def get_surprise_against_intraday_moves_over_event(self, data_frame_cross_orig, cross, event_fx, event_name, start, end, offset_list = [1, 5, 30, 60], add_surprise = False, surprise_field = 'survey-average'): fields = ['actual-release', 'survey-median', 'survey-average', 'survey-high', 'survey-low'] ef_time_frame = self.get_economic_event_date_time_fields(fields, event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame) return self.get_surprise_against_intraday_moves_over_custom_event(data_frame_cross_orig, ef_time_frame, cross, event_fx, event_name, start, end, offset_list = offset_list, add_surprise = add_surprise, surprise_field = surprise_field)
def fetch_market_data(self, md_request): """Loads time series from specified data provider Parameters ---------- md_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ logger = LoggerManager().getLogger(__name__) # data_vendor = self.get_data_vendor(md_request.data_source) # Check if tickers have been specified (if not load all of them for a # category) # also handle single tickers/list tickers create_tickers = False if md_request.vendor_tickers is not None \ and md_request.tickers is None: md_request.tickers = md_request.vendor_tickers tickers = md_request.tickers if tickers is None: create_tickers = True elif isinstance(tickers, str): if tickers == "": create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: md_request.tickers = ConfigManager().get_instance()\ .get_tickers_list_for_category( md_request.category, md_request.data_source, md_request.freq, md_request.cut) # intraday or tick: only one ticker per cache file if md_request.freq in ["intraday", "tick", "second", "hour", "minute"]: df_agg = self.download_intraday_tick(md_request) # Daily: multiple tickers per cache file - assume we make one API call # to vendor library else: df_agg = self.download_daily(md_request) if "internet_load" in md_request.cache_algo: logger.debug("Internet loading.. ") if md_request.cache_algo == "cache_algo": logger.debug( "Only caching data in memory, do not return any time series.") return # Only return time series if specified in the algo if "return" in md_request.cache_algo: # Special case for events/events-dt which is not indexed like other # tables (also same for downloading futures contracts dates) if md_request.category is not None: if "events" in md_request.category: return df_agg # Pad columns a second time (is this necessary to do here again?) # TODO only do this for not daily data? try: if df_agg is not None: df_agg = self._filter.filter_time_series( md_request, df_agg, pad_columns=True) df_agg = df_agg.dropna(how="all") # Resample data using pandas if specified in the # MarketDataRequest if md_request.resample is not None: if "last" in md_request.resample_how: df_agg = df_agg.resample( md_request.resample).last() elif "first" in md_request.resample_how: df_agg = df_agg.resample( md_request.resample).first() if "dropna" in md_request.resample_how: df_agg = df_agg.dropna(how="all") else: logger.warn("No data returned for " + str( md_request.tickers)) return df_agg except Exception as e: if df_agg is not None: return df_agg import traceback logger.warn( "No data returned for " + str(md_request.tickers) + ", " + str(e)) return None
def __init__(self): self.config = ConfigManager() # self.config = None return
class MarketDataGenerator(object): _time_series_cache = {} # shared across all instances of object! def __init__(self): self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.calculations = Calculations() self.io_engine = IOEngine() self._intraday_code = -1 return def flush_cache(self): """ flush_cache - Flushs internal cache of time series """ self._time_series_cache = {} def set_intraday_code(self, code): self._intraday_code = code def get_data_vendor(self, source): """ get_loader - Loads appropriate data service class Parameters ---------- source : str the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc. we can also have forms like "bloomberg-boe" separated by hyphens Returns ------- DataVendor """ data_vendor = None source = source.split("-")[0] if source == 'bloomberg': from findatapy.market.datavendorbbg import DataVendorBBGOpen data_vendor = DataVendorBBGOpen() elif source == 'quandl': from findatapy.market.datavendorweb import DataVendorQuandl data_vendor = DataVendorQuandl() elif source == 'ons': from findatapy.market.datavendorweb import DataVendorONS data_vendor = DataVendorONS() elif source == 'boe': from findatapy.market.datavendorweb import DataVendorBOE data_vendor = DataVendorBOE() elif source == 'dukascopy': from findatapy.market.datavendorweb import DataVendorDukasCopy data_vendor = DataVendorDukasCopy() elif source in [ 'yahoo', 'google', 'fred', 'oecd', 'eurostat', 'edgar-index' ]: from findatapy.market.datavendorweb import DataVendorPandasWeb data_vendor = DataVendorPandasWeb() # TODO add support for other data sources (like Reuters) return data_vendor def fetch_market_data(self, market_data_request, kill_session=True): """ fetch_market_data - Loads time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ tickers = market_data_request.tickers data_vendor = self.get_data_vendor(market_data_request.data_source) # check if tickers have been specified (if not load all of them for a category) # also handle single tickers/list tickers create_tickers = False if tickers is None: create_tickers = True elif isinstance(tickers, str): if tickers == '': create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: market_data_request.tickers = self.config.get_tickers_list_for_category( market_data_request.category, market_data_request.data_source, market_data_request.freq, market_data_request.cut) # intraday or tick: only one ticker per cache file if (market_data_request.freq in ['intraday', 'tick', 'second', 'hour', 'minute']): data_frame_agg = self.download_intraday_tick( market_data_request, data_vendor) # daily: multiple tickers per cache file - assume we make one API call to vendor library else: data_frame_agg = self.download_daily(market_data_request, data_vendor) if ('internet_load' in market_data_request.cache_algo): self.logger.debug("Internet loading.. ") # signal to data_vendor template to exit session # if data_vendor is not None and kill_session == True: data_vendor.kill_session() if (market_data_request.cache_algo == 'cache_algo'): self.logger.debug( "Only caching data in memory, do not return any time series.") return # only return time series if specified in the algo if 'return' in market_data_request.cache_algo: # special case for events/events-dt which is not indexed like other tables if market_data_request.category is not None: if 'events' in market_data_request.category: return data_frame_agg try: return self.filter.filter_time_series(market_data_request, data_frame_agg, pad_columns=True) except: import traceback self.logger.error(traceback.format_exc()) return None def get_market_data_cached(self, market_data_request): """ get_time_series_cached - Loads time series from cache (if it exists) Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ if (market_data_request.freq == "intraday"): ticker = market_data_request.tickers else: ticker = None fname = self.create_time_series_hash_key(market_data_request, ticker) if (fname in self._time_series_cache): data_frame = self._time_series_cache[fname] return self.filter.filter_time_series(market_data_request, data_frame) return None def create_time_series_hash_key(self, market_data_request, ticker=None): """ create_time_series_hash_key - Creates a hash key for retrieving the time series Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ if (isinstance(ticker, list)): ticker = ticker[0] return self.create_cache_file_name( self.create_category_key(market_data_request, ticker)) def download_intraday_tick(self, market_data_request, data_vendor): """ download_intraday_tick - Loads intraday time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ data_frame_agg = None calcuations = Calculations() ticker_cycle = 0 data_frame_group = [] # single threaded version # handle intraday ticker calls separately one by one if len(market_data_request.tickers) == 1 or DataConstants( ).market_thread_no['other'] == 1: for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway data_frame_single = data_vendor.load_ticker( market_data_request_single) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') data_frame_group.append(data_frame_single) # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single # key = self.create_category_key(market_data_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_group is not None: data_frame_agg = calcuations.pandas_outer_join( data_frame_group) return data_frame_agg else: market_data_request_list = [] # create a list of MarketDataRequests for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if hasattr(market_data_request, 'vendor_tickers'): market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 market_data_request_list.append(market_data_request_single) return self.fetch_group_time_series(market_data_request_list) def fetch_single_time_series(self, market_data_request): data_frame_single = self.get_data_vendor( market_data_request.data_source).load_ticker(market_data_request) if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' # will fail for dataframes which includes dates try: data_frame_single = data_frame_single.astype('float32') except: pass if market_data_request.freq == "second": data_frame_single = data_frame_single.resample("1s") return data_frame_single def fetch_group_time_series(self, market_data_request_list): data_frame_agg = None # depends on the nature of operation as to whether we should use threading or multiprocessing library if DataConstants().market_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = DataConstants().market_thread_no['other'] if market_data_request_list[0].data_source in DataConstants( ).market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request_list[0].data_source] if thread_no > 0: pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, market_data_request_list) data_frame_group = result.get() pool.close() pool.join() else: data_frame_group = [] for md_request in market_data_request_list: data_frame_group.append( self.fetch_single_time_series(md_request)) # collect together all the time series if data_frame_group is not None: data_frame_group = [i for i in data_frame_group if i is not None] if data_frame_group is not None: data_frame_agg = self.calculations.pandas_outer_join( data_frame_group) return data_frame_agg def download_daily(self, market_data_request, data_vendor): """ download_daily - Loads daily time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ # daily data does not include ticker in the key, as multiple tickers in the same file if DataConstants().market_thread_no['other'] == 1: data_frame_agg = data_vendor.load_ticker(market_data_request) else: market_data_request_list = [] group_size = int( len(market_data_request.tickers) / DataConstants().market_thread_no['other'] - 1) if group_size == 0: group_size = 1 # split up tickers into groups related to number of threads to call for i in range(0, len(market_data_request.tickers), group_size): market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = market_data_request.tickers[ i:i + group_size] if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = \ market_data_request.vendor_tickers[i:i + group_size] market_data_request_list.append(market_data_request_single) data_frame_agg = self.fetch_group_time_series( market_data_request_list) key = self.create_category_key(market_data_request) fname = self.create_cache_file_name(key) self._time_series_cache[ fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg def create_category_key(self, market_data_request, ticker=None): """ create_category_key - Returns a category key for the associated MarketDataRequest Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ category = 'default-cat' cut = 'default-cut' if market_data_request.category is not None: category = market_data_request.category environment = market_data_request.environment source = market_data_request.data_source freq = market_data_request.freq if market_data_request.cut is not None: cut = market_data_request.cut if (ticker is not None): key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker else: key = environment + "." + category + '.' + source + '.' + freq + '.' + cut return key def create_cache_file_name(self, filename): return DataConstants().folder_time_series_data + "/" + filename
def __init__(self): self.config = ConfigManager().get_instance() self.logger = LoggerManager().getLogger(__name__) # self.config = None return
def fetch_single_time_series(self, md_request): md_request = MarketDataRequest(md_request=md_request) # Only includes those tickers have not expired yet! start_date = pd.Timestamp(md_request.start_date).date() current_date = pd.Timestamp(datetime.datetime.utcnow().date()) tickers = md_request.tickers vendor_tickers = md_request.vendor_tickers expiry_date = pd.Timestamp(md_request.expiry_date) config = ConfigManager().get_instance() # In many cases no expiry is defined so skip them for i in range(0, len(tickers)): try: expiry_date = config.get_expiry_for_ticker( md_request.data_source, tickers[i]) except: pass if expiry_date is not None: expiry_date = pd.Timestamp(expiry_date) if not (pd.isna(expiry_date)): # Use pandas Timestamp, a bit more robust with weird dates # (can fail if comparing date vs datetime) # if the expiry is before the start date of our download # don"t bother downloading this ticker if expiry_date < start_date: tickers[i] = None # Special case for futures-contracts which are intraday # avoid downloading if the expiry date is very far in the # past # (we need this before there might be odd situations where # we run on an expiry date, but still want to get # data right till expiry time) if md_request.category == "futures-contracts" \ and md_request.freq == "intraday" \ and self._days_expired_intraday_contract_download \ > 0: if expiry_date + pd.Timedelta( days= self._days_expired_intraday_contract_download) \ < current_date: tickers[i] = None if vendor_tickers is not None and tickers[i] is None: vendor_tickers[i] = None md_request.tickers = [e for e in tickers if e != None] if vendor_tickers is not None: md_request.vendor_tickers = [e for e in vendor_tickers if e != None] df_single = None if len(md_request.tickers) > 0: df_single = self.get_data_vendor( md_request).load_ticker(md_request) if df_single is not None: if df_single.empty == False: df_single.index.name = "Date" # Will fail for DataFrames which includes dates/strings # eg. futures contract names df_single = Calculations().convert_to_numeric_dataframe( df_single) if md_request.freq == "second": df_single = df_single.resample("1s") return df_single
def __init__(self): self.config = ConfigManager().get_instance() # self.config = None return
class EventsFactory(EventStudy): """Provides methods to fetch data on economic data events and to perform basic event studies for market data around these events. Note, requires a file of input of the following (transposed as columns!) - we give an example for NFP released on 7 Feb 2003 (note, that release-date-time-full, need not be fully aligned by row). USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.Date 31/01/2003 00:00 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.close xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release 143 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-median xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-average xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-low xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high.1 xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.number-observations xyz USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision 185 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision-date 20030307 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-dt 20030207 USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-date-time-full 08/01/1999 13:30 """ # _econ_data_frame = None # where your HDF5 file is stored with economic data # TODO integrate with on the fly downloading! _hdf5_file_econ_file = MarketConstants().hdf5_file_econ_file _db_database_econ_file = MarketConstants().db_database_econ_file ### manual offset for certain events where Bloomberg/data vendor displays the wrong date (usually because of time differences) _offset_events = {'AUD-Australia Labor Force Employment Change SA.release-dt' : 1} def __init__(self, df = None): super(EventStudy, self).__init__() self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.io_engine = IOEngine() self.speed_cache = SpeedCache() if df is not None: self._econ_data_frame = df else: self.load_economic_events() return def load_economic_events(self): self._econ_data_frame = self.speed_cache.get_dataframe(self._db_database_econ_file) if self._econ_data_frame is None: self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk( self._db_database_econ_file, engine=marketconstants.write_engine, db_server=marketconstants.db_server, db_port=marketconstants.db_port, username=marketconstants.db_username, password=marketconstants.db_password) self.speed_cache.put_dataframe(self._db_database_econ_file, self._econ_data_frame) def harvest_category(self, category_name): cat = self.config.get_categories_from_tickers_selective_filter(category_name) for k in cat: md_request = self.market_data_generator.populate_md_request(k) data_frame = self.market_data_generator.fetch_market_data(md_request) # TODO allow merge of multiple sources return data_frame def get_economic_events(self): return self._econ_data_frame def dump_economic_events_csv(self, path): self._econ_data_frame.to_csv(path) def get_economic_event_date_time(self, name, event = None, csv = None): ticker = self.create_event_desciptor_field(name, event, "release-date-time-full") if csv is None: data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker] else: dateparse = lambda x: datetime.datetime.strptime(x, '%d/%m/%Y %H:%M') data_frame = pandas.read_csv(csv, index_col=0, parse_dates = True, date_parser=dateparse) data_frame = data_frame[pandas.notnull(data_frame.index)] #start at a really early date start_date = datetime.datetime.strptime("01-Jan-1971", "%d-%b-%Y") self.filter.filter_time_series_by_date(start_date, None, data_frame) return data_frame def get_economic_event_date_time_dataframe(self, name, event = None, csv = None): series = self.get_economic_event_date_time(name, event, csv) data_frame = pandas.DataFrame(series.values, index=series.index) data_frame.columns.name = self.create_event_desciptor_field(name, event, "release-date-time-full") return data_frame def get_economic_event_date_time_fields(self, fields, name, event = None): ### acceptible fields # observation-date <- observation time for the index # actual-release # survey-median # survey-average # survey-high # survey-low # survey-high # number-observations # release-dt # release-date-time-full # first-revision # first-revision-date ticker = [] # construct tickers of the form USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release for i in range(0, len(fields)): ticker.append(self.create_event_desciptor_field(name, event, fields[i])) # index on the release-dt field eg. 20101230 (we shall convert this later) ticker_index = self.create_event_desciptor_field(name, event, "release-dt") ######## grab event date/times event_date_time = self.get_economic_event_date_time(name, event) date_time_fore = event_date_time.index # create dates for join later date_time_dt = [datetime.datetime( date_time_fore[x].year, date_time_fore[x].month, date_time_fore[x].day) for x in range(len(date_time_fore))] event_date_time_frame = pandas.DataFrame(event_date_time.index, date_time_dt) event_date_time_frame.index = date_time_dt ######## grab event date/fields self._econ_data_frame[name + ".observation-date"] = self._econ_data_frame.index data_frame = self._econ_data_frame[ticker] data_frame.index = self._econ_data_frame[ticker_index] data_frame = data_frame[data_frame.index != 0] # eliminate any 0 dates (artifact of Excel) data_frame = data_frame[pandas.notnull(data_frame.index)] # eliminate any NaN dates (artifact of Excel) ind_dt = data_frame.index # convert yyyymmdd format to datetime data_frame.index = [datetime.datetime( int((ind_dt[x] - (ind_dt[x] % 10000))/10000), int(((ind_dt[x] % 10000) - (ind_dt[x] % 100))/100), int(ind_dt[x] % 100)) for x in range(len(ind_dt))] # HACK! certain events need an offset because BBG have invalid dates if ticker_index in self._offset_events: data_frame.index = data_frame.index + timedelta(days=self._offset_events[ticker_index]) ######## join together event dates/date-time/fields in one data frame data_frame = event_date_time_frame.join(data_frame, how='inner') data_frame.index = pandas.to_datetime(data_frame.index) data_frame.index.name = ticker_index return data_frame def create_event_desciptor_field(self, name, event, field): if event is None: return name + "." + field else: return name + "-" + event + "." + field def get_all_economic_events_date_time(self): event_names = self.get_all_economic_events() columns = ['event-name', 'release-date-time-full'] data_frame = pandas.DataFrame(data=numpy.zeros((0,len(columns))), columns=columns) for event in event_names: event_times = self.get_economic_event_date_time(event) for time in event_times: data_frame.append({'event-name':event, 'release-date-time-full':time}, ignore_index=True) return data_frame def get_all_economic_events(self): field_names = self._econ_data_frame.columns.values event_names = [x.split('.')[0] for x in field_names if '.Date' in x] event_names_filtered = [x for x in event_names if len(x) > 4] # sort list alphabetically (and remove any duplicates) return list(set(event_names_filtered)) def get_economic_event_date(self, name, event = None): return self._econ_data_frame[ self.create_event_desciptor_field(name, event, ".release-dt")] def get_economic_event_ret_over_custom_event_day(self, data_frame_in, name, event, start, end, lagged = False, NYC_cutoff = 10): # get the times of events event_dates = self.get_economic_event_date_time(name, event) return super(EventsFactory, self).get_economic_event_ret_over_custom_event_day(data_frame_in, event_dates, name, event, start, end, lagged = lagged, NYC_cutoff = NYC_cutoff) def get_economic_event_vol_over_event_day(self, vol_in, name, event, start, end, realised = False): return self.get_economic_event_ret_over_custom_event_day(vol_in, name, event, start, end, lagged = realised) def get_daily_moves_over_event(self): # TODO pass # return only US events etc. by dates def get_intraday_moves_over_event(self, data_frame_rets, cross, event_fx, event_name, start, end, vol, mins = 3 * 60, min_offset = 0, create_index = False, resample = False, freq = 'minutes'): ef_time_frame = self.get_economic_event_date_time_dataframe(event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame) return self.get_intraday_moves_over_custom_event(data_frame_rets, ef_time_frame, vol, mins = mins, min_offset = min_offset, create_index = create_index, resample = resample, freq = freq)#, start, end) def get_surprise_against_intraday_moves_over_event(self, data_frame_cross_orig, cross, event_fx, event_name, start, end, offset_list = [1, 5, 30, 60], add_surprise = False, surprise_field = 'survey-average'): fields = ['actual-release', 'survey-median', 'survey-average', 'survey-high', 'survey-low'] ef_time_frame = self.get_economic_event_date_time_fields(fields, event_fx, event_name) ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame) return self.get_surprise_against_intraday_moves_over_custom_event(data_frame_cross_orig, ef_time_frame, cross, event_fx, event_name, start, end, offset_list = offset_list, add_surprise = add_surprise, surprise_field = surprise_field)
def fetch_market_data(self, market_data_request, kill_session=True): """ fetch_market_data - Loads time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ tickers = market_data_request.tickers data_vendor = self.get_data_vendor(market_data_request.data_source) # check if tickers have been specified (if not load all of them for a category) # also handle single tickers/list tickers create_tickers = False if tickers is None: create_tickers = True elif isinstance(tickers, str): if tickers == '': create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: market_data_request.tickers = ConfigManager().get_instance( ).get_tickers_list_for_category(market_data_request.category, market_data_request.data_source, market_data_request.freq, market_data_request.cut) # intraday or tick: only one ticker per cache file if (market_data_request.freq in ['intraday', 'tick', 'second', 'hour', 'minute']): data_frame_agg = self.download_intraday_tick( market_data_request, data_vendor) # daily: multiple tickers per cache file - assume we make one API call to vendor library else: data_frame_agg = self.download_daily(market_data_request, data_vendor) if ('internet_load' in market_data_request.cache_algo): self.logger.debug("Internet loading.. ") # signal to data_vendor template to exit session # if data_vendor is not None and kill_session == True: data_vendor.kill_session() if (market_data_request.cache_algo == 'cache_algo'): self.logger.debug( "Only caching data in memory, do not return any time series.") return # only return time series if specified in the algo if 'return' in market_data_request.cache_algo: # special case for events/events-dt which is not indexed like other tables if market_data_request.category is not None: if 'events' in market_data_request.category: return data_frame_agg try: return self.filter.filter_time_series(market_data_request, data_frame_agg, pad_columns=True) except: import traceback self.logger.error(traceback.format_exc()) return None
# findatapy has predefined tickers and categories in the conf folder in # CSV files which you can modify, that # map from tickers/categories to vendor tickers/vendor fields # # time_series_categories_fields.csv # time_series_fields_list.csv # time_series_tickers_list.csv # # You can also add you own files with tickers, like we have done for FX vol # like fx_vol_tickers.csv # # Having these ticker mappings makes it easier to mix and match data from # different data sources, and saves us # having to remember vendor specific tickers cm = ConfigManager().get_instance() # Get all the categories for raw data (note this won't include generated # categories like fx-vol-market, # which aggregate from many other categories) categories = list(cm.get_categories_from_tickers()) print(categories) # Filter those categories which include quandl quandl_category = [x for x in categories if 'quandl' in x] print(quandl_category[0]) # For this category, get all the tickers and fields which are available tickers = cm.get_tickers_list_for_category_str(categories[0])