def get_economic_data_history(self, start_date, finish_date, country_group, data_type, source='alfred', cache_algo="internet_load_return"): logger = LoggerManager().getLogger(__name__) # vendor_country_codes = self.fred_country_codes[country_group] # vendor_pretty_country = self.fred_nice_country_codes[country_group] if isinstance(country_group, list): pretty_country_names = country_group else: # Get all the country names in the country_group pretty_country_names = list(self._econ_country_groups[ self._econ_country_groups["Country Group"] == country_group] ['Country']) # Construct the pretty tickers pretty_tickers = [x + '-' + data_type for x in pretty_country_names] # Get vendor tickers vendor_tickers = [] for pretty_ticker in pretty_tickers: vendor_ticker = list( self._all_econ_tickers[self._all_econ_tickers["Full Code"] == pretty_ticker][source].values) if vendor_ticker == []: vendor_ticker = None logger.error('Could not find match for ' + pretty_ticker) else: vendor_ticker = vendor_ticker[0] vendor_tickers.append(vendor_ticker) vendor_fields = ['close'] if source == 'bloomberg': vendor_fields = ['PX_LAST'] md_request = MarketDataRequest( start_date=start_date, # start date finish_date=finish_date, # finish date category='economic', freq='daily', # daily data data_source=source, # use Bloomberg as data source cut='LOC', tickers=pretty_tickers, fields=['close'], # which fields to download vendor_tickers=vendor_tickers, vendor_fields= vendor_fields, # which Bloomberg/data vendor fields to download cache_algo=cache_algo) # how to return data return self.market_data_generator.fetch_market_data(md_request)
def translate_from_vendor_field(self, vendor_fields_list, md_request): """Converts all the fields from vendors fields to findatapy fields Parameters ---------- md_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ data_source = md_request.data_source if isinstance(vendor_fields_list, str): vendor_fields_list = [vendor_fields_list] # if self.config is None: return vendor_fields_list fields_converted = [] # If we haven't set the configuration files for automatic configuration if md_request.vendor_fields is not None: dictionary = dict(zip(self.get_lower_case_list( md_request.vendor_fields), md_request.fields)) for vendor_field in vendor_fields_list: try: fields_converted.append(dictionary[vendor_field.lower()]) except: fields_converted.append(vendor_field) # Otherwise used stored configuration files (every field needs to be # defined!) else: for vendor_field in vendor_fields_list: try: v = self.config.convert_vendor_to_library_field( data_source, vendor_field) except: logger = LoggerManager().getLogger(__name__) logger.error( "Couldn't find field conversion, did you type it " "correctly: " + vendor_field + ", using 'close' as default.") v = 'close' fields_converted.append(v) return fields_converted
def translate_from_vendor_ticker(self, vendor_tickers_list, market_data_request): """Converts all the fields from vendor tickers to findatapy tickers Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ if market_data_request.vendor_tickers is not None: dictionary = dict( zip( self.get_lower_case_list( market_data_request.vendor_tickers), market_data_request.tickers)) tickers_stuff = [] for vendor_ticker in vendor_tickers_list: tickers_stuff.append(dictionary[vendor_ticker.lower()]) return tickers_stuff # [item for sublist in tickers_stuff for item in sublist] data_source = market_data_request.data_source # tickers_list = market_data_request.tickers if isinstance(vendor_tickers_list, str): vendor_tickers_list = [vendor_tickers_list] if self.config is None: return vendor_tickers_list tickers_converted = [] for vendor_ticker in vendor_tickers_list: try: v = self.config.convert_vendor_to_library_ticker( data_source, vendor_ticker) except: logger = LoggerManager().getLogger(__name__) logger.error( "Couldn't find ticker conversion, did you type it correctly: " + vendor_ticker) return tickers_converted.append(v) return tickers_converted
def translate_to_vendor_ticker(self, md_request): """Converts all the tickers from findatapy tickers to vendor tickers Parameters ---------- md_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ if md_request.vendor_tickers is not None: return md_request.vendor_tickers category = md_request.category source = md_request.data_source freq = md_request.freq cut = md_request.cut tickers_list = md_request.tickers if isinstance(tickers_list, str): tickers_list = [tickers_list] if self.config is None: return tickers_list tickers_list_converted = [] for ticker in tickers_list: try: t = self.config.convert_library_to_vendor_ticker(category, source, freq, cut, ticker) except: logger = LoggerManager().getLogger(__name__) logger.error( "Couldn't find ticker conversion, did you type " "it correctly: " + ticker) return tickers_list_converted.append(t) return tickers_list_converted
class DataVendorQuandl(DataVendor): def __init__(self): super(DataVendorQuandl, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, market_data_request): market_data_request_vendor = self.construct_vendor_market_data_request(market_data_request) self.logger.info("Request Quandl data") data_frame = self.download_daily(market_data_request_vendor) if data_frame is None or data_frame.index is []: return None # convert from vendor to findatapy tickers/fields if data_frame is not None: returned_tickers = data_frame.columns if data_frame is not None: # tidy up tickers into a format that is more easily translatable # we can often get multiple fields returned (even if we don't ask for them!) # convert to lower case returned_fields = [(x.split(' - ')[1]).lower().replace(' ', '-').replace('.', '-').replace('--', '-') for x in returned_tickers] returned_fields = [x.replace('value', 'close') for x in returned_fields] # special case for close # replace time fields (can cause problems later for times to start with 0) for i in range(0, 10): returned_fields = [x.replace('0'+ str(i) + ':00', str(i) + ':00') for x in returned_fields] returned_tickers = [x.replace('.', '/') for x in returned_tickers] returned_tickers = [x.split(' - ')[0] for x in returned_tickers] try: fields = self.translate_from_vendor_field(returned_fields, market_data_request) tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request) except: print('error') ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' self.logger.info("Completed request from Quandl for " + str(ticker_combined)) return data_frame def download_daily(self, market_data_request): trials = 0 data_frame = None while(trials < 5): try: data_frame = Quandl.get(market_data_request.tickers, authtoken=DataConstants().quandl_api_key, trim_start=market_data_request.start_date, trim_end=market_data_request.finish_date) break except: trials = trials + 1 self.logger.info("Attempting... " + str(trials) + " request to download from Quandl") if trials == 5: self.logger.error("Couldn't download from Quandl after several attempts!") return data_frame
class DataVendorBOE(DataVendor): def __init__(self): super(DataVendorBOE, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, market_data_request): market_data_request_vendor = self.construct_vendor_market_data_request(market_data_request) self.logger.info("Request BOE data") data_frame = self.download_daily(market_data_request_vendor) if data_frame is None or data_frame.index is []: return None # convert from vendor to findatapy tickers/fields if data_frame is not None: returned_tickers = data_frame.columns if data_frame is not None: # tidy up tickers into a format that is more easily translatable # we can often get multiple fields returned (even if we don't ask for them!) # convert to lower case returned_fields = [(x.split(' - ')[1]).lower().replace(' ', '-') for x in returned_tickers] returned_fields = [x.replace('value', 'close') for x in returned_fields] # special case for close returned_tickers = [x.replace('.', '/') for x in returned_tickers] returned_tickers = [x.split(' - ')[0] for x in returned_tickers] fields = self.translate_from_vendor_field(returned_fields, market_data_request) tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' self.logger.info("Completed request from BOE.") return data_frame def download_daily(self, market_data_request): trials = 0 data_frame = None while (trials < 5): try: # TODO break except: trials = trials + 1 self.logger.info("Attempting... " + str(trials) + " request to download from BOE") if trials == 5: self.logger.error("Couldn't download from ONS after several attempts!") return data_frame
class DataVendorALFRED(DataVendor): def __init__(self): super(DataVendorALFRED, self).__init__() self.logger = LoggerManager().getLogger(__name__) # implement method in abstract superclass def load_ticker(self, market_data_request): market_data_request_vendor = self.construct_vendor_market_data_request(market_data_request) self.logger.info("Request ALFRED/FRED data") data_frame = self.download_daily(market_data_request_vendor) if data_frame is None or data_frame.index is []: return None # convert from vendor to findatapy tickers/fields if data_frame is not None: returned_tickers = data_frame.columns if data_frame is not None: # tidy up tickers into a format that is more easily translatable # we can often get multiple fields returned (even if we don't ask for them!) # convert to lower case returned_fields = [(x.split('.')[1]) for x in returned_tickers] returned_tickers = [(x.split('.')[0]) for x in returned_tickers] try: fields = self.translate_from_vendor_field(returned_fields, market_data_request) tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request) except: print('error') ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' self.logger.info("Completed request from ALFRED/FRED for " + str(ticker_combined)) return data_frame def download_daily(self, market_data_request): trials = 0 data_frame_list = [] data_frame_release = [] # TODO refactor this code, a bit messy at the moment! for i in range(0, len(market_data_request.tickers)): while (trials < 5): try: fred = Fred(api_key=DataConstants().fred_api_key) # acceptable fields: close, actual-release, release-date-time-full if 'close' in market_data_request.fields and 'release-date-time-full' in market_data_request.fields: data_frame = fred.get_series_all_releases(market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame.columns = ['Date', market_data_request.tickers[i] + '.release-date-time-full', market_data_request.tickers[i] + '.close'] data_frame = data_frame.sort_values(by=['Date', market_data_request.tickers[i] + '.release-date-time-full']) data_frame = data_frame.drop_duplicates(subset=['Date'], keep='last') data_frame = data_frame.set_index(['Date']) filter = Filter() data_frame = filter.filter_time_series_by_date(market_data_request.start_date, market_data_request.finish_date, data_frame) data_frame_list.append(data_frame) elif 'close' in market_data_request.fields: data_frame = fred.get_series(series_id=market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame = pandas.DataFrame(data_frame) data_frame.columns = [market_data_request.tickers[i] + '.close'] data_frame_list.append(data_frame) if 'first-revision' in market_data_request.fields: data_frame = fred.get_series_first_revision(market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame = pandas.DataFrame(data_frame) data_frame.columns = [market_data_request.tickers[i] + '.first-revision'] filter = Filter() data_frame = filter.filter_time_series_by_date(market_data_request.start_date, market_data_request.finish_date, data_frame) data_frame_list.append(data_frame) if 'actual-release' in market_data_request.fields and 'release-date-time-full' in market_data_request.fields: data_frame = fred.get_series_all_releases(market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame.columns = ['Date', market_data_request.tickers[i] + '.release-date-time-full', market_data_request.tickers[i] + '.actual-release'] data_frame = data_frame.sort_values(by=['Date', market_data_request.tickers[i] + '.release-date-time-full']) data_frame = data_frame.drop_duplicates(subset=['Date'], keep='first') data_frame = data_frame.set_index(['Date']) filter = Filter() data_frame = filter.filter_time_series_by_date(market_data_request.start_date, market_data_request.finish_date, data_frame) data_frame_list.append(data_frame) elif 'actual-release' in market_data_request.fields: data_frame = fred.get_series_first_release(market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame = pandas.DataFrame(data_frame) data_frame.columns = [market_data_request.tickers[i] + '.actual-release'] filter = Filter() data_frame = filter.filter_time_series_by_date(market_data_request.start_date, market_data_request.finish_date, data_frame) data_frame_list.append(data_frame) elif 'release-date-time-full' in market_data_request.fields: data_frame = fred.get_series_all_releases(market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame = data_frame['realtime_start'] data_frame = pandas.DataFrame(data_frame) data_frame.columns = [market_data_request.tickers[i] + '.release-date-time-full'] data_frame.index = data_frame[market_data_request.tickers[i] + '.release-date-time-full'] data_frame = data_frame.sort() data_frame = data_frame.drop_duplicates() filter = Filter() data_frame_release.append(filter.filter_time_series_by_date(market_data_request.start_date, market_data_request.finish_date, data_frame)) break except: trials = trials + 1 self.logger.info("Attempting... " + str(trials) + " request to download from ALFRED/FRED") if trials == 5: self.logger.error("Couldn't download from ALFRED/FRED after several attempts!") calc = Calculations() data_frame1 = calc.pandas_outer_join(data_frame_list) data_frame2 = calc.pandas_outer_join(data_frame_release) data_frame = pandas.concat([data_frame1, data_frame2], axis=1) return data_frame
class MarketDataGenerator(object): """Returns market data time series by directly calling market data sources. At present it supports Bloomberg (bloomberg), Yahoo (yahoo), Quandl (quandl), FRED (fred) etc. which are implemented in subclasses of DataVendor class. This provides a common wrapper for all these data sources. """ def __init__(self): self.config = ConfigManager().get_instance() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.calculations = Calculations() self.io_engine = IOEngine() self._intraday_code = -1 self.days_expired_intraday_contract_download = -1 return def set_intraday_code(self, code): self._intraday_code = code def get_data_vendor(self, source): """Loads appropriate data service class Parameters ---------- source : str the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc. we can also have forms like "bloomberg-boe" separated by hyphens Returns ------- DataVendor """ data_vendor = None try: source = source.split("-")[0] except: self.logger.error("Was data source specified?") return None if source == 'bloomberg': try: from findatapy.market.datavendorbbg import DataVendorBBGOpen data_vendor = DataVendorBBGOpen() except: self.logger.warn("Bloomberg needs to be installed") elif source == 'quandl': from findatapy.market.datavendorweb import DataVendorQuandl data_vendor = DataVendorQuandl() elif source == 'ons': from findatapy.market.datavendorweb import DataVendorONS data_vendor = DataVendorONS() elif source == 'boe': from findatapy.market.datavendorweb import DataVendorBOE data_vendor = DataVendorBOE() elif source == 'dukascopy': from findatapy.market.datavendorweb import DataVendorDukasCopy data_vendor = DataVendorDukasCopy() elif source == 'fxcm': from findatapy.market.datavendorweb import DataVendorFXCM data_vendor = DataVendorFXCM() elif source == 'alfred': from findatapy.market.datavendorweb import DataVendorALFRED data_vendor = DataVendorALFRED() elif source == 'yahoo': from findatapy.market.datavendorweb import DataVendorYahoo data_vendor = DataVendorYahoo() elif source in ['google', 'fred', 'oecd', 'eurostat', 'edgar-index']: from findatapy.market.datavendorweb import DataVendorPandasWeb data_vendor = DataVendorPandasWeb() elif source == 'bitcoincharts': from findatapy.market.datavendorweb import DataVendorBitcoincharts data_vendor = DataVendorBitcoincharts() elif source == 'poloniex': from findatapy.market.datavendorweb import DataVendorPoloniex data_vendor = DataVendorPoloniex() elif source == 'binance': from findatapy.market.datavendorweb import DataVendorBinance data_vendor = DataVendorBinance() elif source == 'bitfinex': from findatapy.market.datavendorweb import DataVendorBitfinex data_vendor = DataVendorBitfinex() elif source == 'gdax': from findatapy.market.datavendorweb import DataVendorGdax data_vendor = DataVendorGdax() elif source == 'kraken': from findatapy.market.datavendorweb import DataVendorKraken data_vendor = DataVendorKraken() elif source == 'bitmex': from findatapy.market.datavendorweb import DataVendorBitmex data_vendor = DataVendorBitmex() elif '.csv' in source or '.h5' in source: from findatapy.market.datavendorweb import DataVendorFlatFile data_vendor = DataVendorFlatFile() elif source == 'alphavantage': from findatapy.market.datavendorweb import DataVendorAlphaVantage data_vendor = DataVendorAlphaVantage() elif source == 'huobi': from findatapy.market.datavendorweb import DataVendorHuobi data_vendor = DataVendorHuobi() # TODO add support for other data sources (like Reuters) return data_vendor def fetch_market_data(self, market_data_request, kill_session=True): """Loads time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ # data_vendor = self.get_data_vendor(market_data_request.data_source) # check if tickers have been specified (if not load all of them for a category) # also handle single tickers/list tickers create_tickers = False if market_data_request.vendor_tickers is not None and market_data_request.tickers is None: market_data_request.tickers = market_data_request.vendor_tickers tickers = market_data_request.tickers if tickers is None: create_tickers = True elif isinstance(tickers, str): if tickers == '': create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: market_data_request.tickers = ConfigManager().get_instance( ).get_tickers_list_for_category(market_data_request.category, market_data_request.data_source, market_data_request.freq, market_data_request.cut) # intraday or tick: only one ticker per cache file if (market_data_request.freq in ['intraday', 'tick', 'second', 'hour', 'minute']): data_frame_agg = self.download_intraday_tick(market_data_request) # return data_frame_agg # daily: multiple tickers per cache file - assume we make one API call to vendor library else: data_frame_agg = self.download_daily(market_data_request) if ('internet_load' in market_data_request.cache_algo): self.logger.debug("Internet loading.. ") # signal to data_vendor template to exit session # if data_vendor is not None and kill_session == True: data_vendor.kill_session() if (market_data_request.cache_algo == 'cache_algo'): self.logger.debug( "Only caching data in memory, do not return any time series.") return # only return time series if specified in the algo if 'return' in market_data_request.cache_algo: # special case for events/events-dt which is not indexed like other tables (also same for downloading futures # contracts dates) if market_data_request.category is not None: if 'events' in market_data_request.category: return data_frame_agg # pad columns a second time (is this necessary to do here again?) # TODO only do this for not daily data? try: if data_frame_agg is not None: data_frame_agg = self.filter.filter_time_series(market_data_request, data_frame_agg, pad_columns=True)\ .dropna(how = 'all') # resample data using pandas if specified in the MarketDataRequest if market_data_request.resample is not None: if 'last' in market_data_request.resample_how: data_frame_agg = data_frame_agg.resample( market_data_request.resample).last() elif 'first' in market_data_request.resample_how: data_frame_agg = data_frame_agg.resample( market_data_request.resample).first() if 'dropna' in market_data_request.resample_how: data_frame_agg = data_frame_agg.dropna(how='all') else: self.logger.warn("No data returned for " + str(market_data_request.tickers)) return data_frame_agg except Exception as e: print(str(e)) if data_frame_agg is not None: return data_frame_agg import traceback self.logger.warn("No data returned for " + str(market_data_request.tickers)) return None def create_time_series_hash_key(self, market_data_request, ticker=None): """Creates a hash key for retrieving the time series Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ if (isinstance(ticker, list)): ticker = ticker[0] return self.create_cache_file_name( MarketDataRequest().create_category_key(market_data_request, ticker)) def download_intraday_tick(self, market_data_request): """Loads intraday time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ data_frame_agg = None calcuations = Calculations() ticker_cycle = 0 data_frame_group = [] # single threaded version # handle intraday ticker calls separately one by one if len(market_data_request.tickers) == 1 or DataConstants( ).market_thread_no['other'] == 1: for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway # old_finish_date = market_data_request_single.finish_date # # market_data_request_single.finish_date = self.refine_expiry_date(market_data_request) # # if market_data_request_single.finish_date >= market_data_request_single.start_date: # data_frame_single = data_vendor.load_ticker(market_data_request_single) # else: # data_frame_single = None # # market_data_request_single.finish_date = old_finish_date # # data_frame_single = data_vendor.load_ticker(market_data_request_single) data_frame_single = self.fetch_single_time_series( market_data_request) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') data_frame_group.append(data_frame_single) # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single # key = self.create_category_key(market_data_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_group is not None: data_frame_agg = calcuations.pandas_outer_join( data_frame_group) return data_frame_agg else: market_data_request_list = [] # create a list of MarketDataRequests for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 market_data_request_list.append(market_data_request_single) return self.fetch_group_time_series(market_data_request_list) def fetch_single_time_series(self, market_data_request): market_data_request = MarketDataRequest(md_request=market_data_request) # only includes those tickers have not expired yet! start_date = pandas.Timestamp(market_data_request.start_date).date() import datetime current_date = datetime.datetime.utcnow().date() from datetime import timedelta tickers = market_data_request.tickers vendor_tickers = market_data_request.vendor_tickers expiry_date = market_data_request.expiry_date config = ConfigManager().get_instance() # in many cases no expiry is defined so skip them for i in range(0, len(tickers)): try: expiry_date = config.get_expiry_for_ticker( market_data_request.data_source, tickers[i]) except: pass if expiry_date is not None: expiry_date = pandas.Timestamp(expiry_date).date() # use pandas Timestamp, a bit more robust with weird dates (can fail if comparing date vs datetime) # if the expiry is before the start date of our download don't bother downloading this ticker if expiry_date < start_date: tickers[i] = None # special case for futures-contracts which are intraday # avoid downloading if the expiry date is very far in the past # (we need this before there might be odd situations where we run on an expiry date, but still want to get # data right till expiry time) if market_data_request.category == 'futures-contracts' and market_data_request.freq == 'intraday' \ and self.days_expired_intraday_contract_download > 0: if expiry_date + timedelta( days=self.days_expired_intraday_contract_download ) < current_date: tickers[i] = None if vendor_tickers is not None and tickers[i] is None: vendor_tickers[i] = None market_data_request.tickers = [e for e in tickers if e != None] if vendor_tickers is not None: market_data_request.vendor_tickers = [ e for e in vendor_tickers if e != None ] data_frame_single = None if len(market_data_request.tickers) > 0: data_frame_single = self.get_data_vendor( market_data_request.data_source).load_ticker( market_data_request) #print(data_frame_single.head(n=10)) if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' # will fail for dataframes which includes dates/strings (eg. futures contract names) try: data_frame_single = data_frame_single.astype('float32') except: self.logger.warning('Could not convert to float') if market_data_request.freq == "second": data_frame_single = data_frame_single.resample("1s") return data_frame_single def fetch_group_time_series(self, market_data_request_list): data_frame_agg = None thread_no = DataConstants().market_thread_no['other'] if market_data_request_list[0].data_source in DataConstants( ).market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request_list[0].data_source] if thread_no > 0: pool = SwimPool().create_pool( thread_technique=DataConstants().market_thread_technique, thread_no=thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, market_data_request_list) data_frame_group = result.get() pool.close() pool.join() else: data_frame_group = [] for md_request in market_data_request_list: data_frame_group.append( self.fetch_single_time_series(md_request)) # collect together all the time series if data_frame_group is not None: data_frame_group = [i for i in data_frame_group if i is not None] # for debugging! # import pickle # import datetime # pickle.dump(data_frame_group, open(str(datetime.datetime.now()).replace(':', '-').replace(' ', '-').replace(".", "-") + ".p", "wb")) if data_frame_group is not None: try: data_frame_agg = self.calculations.pandas_outer_join( data_frame_group) except Exception as e: self.logger.warning( 'Possible overlap of columns? Have you specifed same ticker several times: ' + str(e)) return data_frame_agg def download_daily(self, market_data_request): """Loads daily time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ key = MarketDataRequest().create_category_key(market_data_request) is_key_overriden = False for k in DataConstants().override_multi_threading_for_categories: if k in key: is_key_overriden = True break # by default use other thread_no = DataConstants().market_thread_no['other'] if market_data_request.data_source in DataConstants().market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request.data_source] # daily data does not include ticker in the key, as multiple tickers in the same file if thread_no == 1: # data_frame_agg = data_vendor.load_ticker(market_data_request) data_frame_agg = self.fetch_single_time_series(market_data_request) else: market_data_request_list = [] # when trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution group_size = max( int(len(market_data_request.tickers) / thread_no - 1), 0) if group_size == 0: group_size = 1 # split up tickers into groups related to number of threads to call for i in range(0, len(market_data_request.tickers), group_size): market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = market_data_request.tickers[ i:i + group_size] if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = \ market_data_request.vendor_tickers[i:i + group_size] market_data_request_list.append(market_data_request_single) # special case where we make smaller calls one after the other if is_key_overriden: data_frame_list = [] for md in market_data_request_list: data_frame_list.append(self.fetch_single_time_series(md)) data_frame_agg = self.calculations.pandas_outer_join( data_frame_list) else: data_frame_agg = self.fetch_group_time_series( market_data_request_list) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg def refine_expiry_date(self, market_data_request): # expiry date if market_data_request.expiry_date is None: ConfigManager().get_instance().get_expiry_for_ticker( market_data_request.data_source, market_data_request.ticker) return market_data_request def create_cache_file_name(self, filename): return DataConstants().folder_time_series_data + "/" + filename
class HistEconDataFactory(object): def __init__(self, market_data_generator = None): self.logger = LoggerManager().getLogger(__name__) self._all_econ_tickers = pandas.read_csv(DataConstants().all_econ_tickers) self._econ_country_codes = pandas.read_csv(DataConstants().econ_country_codes) self._econ_country_groups = pandas.read_csv(DataConstants().econ_country_groups) if market_data_generator is None: self.market_data_generator = MarketDataGenerator() else: self.market_data_generator = market_data_generator def get_economic_data_history(self, start_date, finish_date, country_group, data_type, source = 'fred', cache_algo = "internet_load_return"): #vendor_country_codes = self.fred_country_codes[country_group] #vendor_pretty_country = self.fred_nice_country_codes[country_group] if isinstance(country_group, list): pretty_country_names = country_group else: # get all the country names in the country_group pretty_country_names = list(self._econ_country_groups[ self._econ_country_groups["Country Group"] == country_group]['Country']) # construct the pretty tickers pretty_tickers = [x + '-' + data_type for x in pretty_country_names] # get vendor tickers vendor_tickers = [] for pretty_ticker in pretty_tickers: vendor_ticker = list(self._all_econ_tickers[ self._all_econ_tickers["Full Code"] == pretty_ticker][source].values) if vendor_ticker == []: vendor_ticker = None self.logger.error('Could not find match for ' + pretty_ticker) else: vendor_ticker = vendor_ticker[0] vendor_tickers.append(vendor_ticker) vendor_fields = ['close'] if source == 'bloomberg': vendor_fields = ['PX_LAST'] md_request = MarketDataRequest( start_date = start_date, # start date finish_date = finish_date, # finish date category = 'economic', freq = 'daily', # intraday data data_source = source, # use Bloomberg as data source cut = 'LOC', tickers = pretty_tickers, fields = ['close'], # which fields to download vendor_tickers = vendor_tickers, vendor_fields = vendor_fields, # which Bloomberg fields to download cache_algo = cache_algo) # how to return data return self.market_data_generator.fetch_market_data(md_request) def grasp_coded_entry(self, df, index): df = df.ix[index:].stack() df = df.reset_index() df.columns = ['Date', 'Name', 'Val'] countries = df['Name'] countries = [x.split('-', 1)[0] for x in countries] df['Code'] = sum( [list(self._econ_country_codes[self._econ_country_codes["Country"] == x]['Code']) for x in countries], []) return df
class HistEconDataFactory(object): def __init__(self, market_data_generator = None): self.logger = LoggerManager().getLogger(__name__) self._all_econ_tickers = pandas.read_csv(DataConstants().all_econ_tickers) self._econ_country_codes = pandas.read_csv(DataConstants().econ_country_codes) self._econ_country_groups = pandas.read_csv(DataConstants().econ_country_groups) if market_data_generator is None: self.market_data_generator = MarketDataGenerator() else: self.market_data_generator = market_data_generator def get_economic_data_history(self, start_date, finish_date, country_group, data_type, source = 'fred', cache_algo = "internet_load_return"): if isinstance(country_group, list): pretty_country_names = country_group else: # get all the country names in the country_group pretty_country_names = list(self._econ_country_groups[ self._econ_country_groups["Country Group"] == country_group]['Country']) # construct the pretty tickers pretty_tickers = [x + '-' + data_type for x in pretty_country_names] # get vendor tickers vendor_tickers = [] for pretty_ticker in pretty_tickers: vendor_ticker = list(self._all_econ_tickers[ self._all_econ_tickers["Full Code"] == pretty_ticker][source].values) if vendor_ticker == []: vendor_ticker = None self.logger.error('Could not find match for ' + pretty_ticker) else: vendor_ticker = vendor_ticker[0] vendor_tickers.append(vendor_ticker) vendor_fields = ['close'] if source == 'bloomberg': vendor_fields = ['PX_LAST'] md_request = MarketDataRequest( start_date = start_date, # start date finish_date = finish_date, # finish date category = 'economic', freq = 'daily', # intraday data data_source = source, # use Bloomberg as data source cut = 'LOC', tickers = pretty_tickers, fields = ['close'], # which fields to download vendor_tickers = vendor_tickers, vendor_fields = vendor_fields, # which Bloomberg fields to download cache_algo = cache_algo) # how to return data return self.market_data_generator.fetch_market_data(md_request) def grasp_coded_entry(self, df, index): df = df.ix[index:].stack() df = df.reset_index() df.columns = ['Date', 'Name', 'Val'] countries = df['Name'] countries = [x.split('-', 1)[0] for x in countries] df['Code'] = sum( [list(self._econ_country_codes[self._econ_country_codes["Country"] == x]['Code']) for x in countries], []) return df
class MarketDataGenerator(object): _time_series_cache = {} # shared across all instances of object! def __init__(self): self.config = ConfigManager() self.logger = LoggerManager().getLogger(__name__) self.filter = Filter() self.calculations = Calculations() self.io_engine = IOEngine() self._intraday_code = -1 return def flush_cache(self): """ flush_cache - Flushs internal cache of time series """ self._time_series_cache = {} def set_intraday_code(self, code): self._intraday_code = code def get_data_vendor(self, source): """ get_loader - Loads appropriate data service class Parameters ---------- source : str the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc. we can also have forms like "bloomberg-boe" separated by hyphens Returns ------- DataVendor """ data_vendor = None source = source.split("-")[0] if source == 'bloomberg': from findatapy.market.datavendorbbg import DataVendorBBGOpen data_vendor = DataVendorBBGOpen() elif source == 'quandl': from findatapy.market.datavendorweb import DataVendorQuandl data_vendor = DataVendorQuandl() elif source == 'ons': from findatapy.market.datavendorweb import DataVendorONS data_vendor = DataVendorONS() elif source == 'boe': from findatapy.market.datavendorweb import DataVendorBOE data_vendor = DataVendorBOE() elif source == 'dukascopy': from findatapy.market.datavendorweb import DataVendorDukasCopy data_vendor = DataVendorDukasCopy() elif source in [ 'yahoo', 'google', 'fred', 'oecd', 'eurostat', 'edgar-index' ]: from findatapy.market.datavendorweb import DataVendorPandasWeb data_vendor = DataVendorPandasWeb() # TODO add support for other data sources (like Reuters) return data_vendor def fetch_market_data(self, market_data_request, kill_session=True): """ fetch_market_data - Loads time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ tickers = market_data_request.tickers data_vendor = self.get_data_vendor(market_data_request.data_source) # check if tickers have been specified (if not load all of them for a category) # also handle single tickers/list tickers create_tickers = False if tickers is None: create_tickers = True elif isinstance(tickers, str): if tickers == '': create_tickers = True elif isinstance(tickers, list): if tickers == []: create_tickers = True if create_tickers: market_data_request.tickers = self.config.get_tickers_list_for_category( market_data_request.category, market_data_request.data_source, market_data_request.freq, market_data_request.cut) # intraday or tick: only one ticker per cache file if (market_data_request.freq in ['intraday', 'tick', 'second', 'hour', 'minute']): data_frame_agg = self.download_intraday_tick( market_data_request, data_vendor) # daily: multiple tickers per cache file - assume we make one API call to vendor library else: data_frame_agg = self.download_daily(market_data_request, data_vendor) if ('internet_load' in market_data_request.cache_algo): self.logger.debug("Internet loading.. ") # signal to data_vendor template to exit session # if data_vendor is not None and kill_session == True: data_vendor.kill_session() if (market_data_request.cache_algo == 'cache_algo'): self.logger.debug( "Only caching data in memory, do not return any time series.") return # only return time series if specified in the algo if 'return' in market_data_request.cache_algo: # special case for events/events-dt which is not indexed like other tables if market_data_request.category is not None: if 'events' in market_data_request.category: return data_frame_agg try: return self.filter.filter_time_series(market_data_request, data_frame_agg, pad_columns=True) except: import traceback self.logger.error(traceback.format_exc()) return None def get_market_data_cached(self, market_data_request): """ get_time_series_cached - Loads time series from cache (if it exists) Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ if (market_data_request.freq == "intraday"): ticker = market_data_request.tickers else: ticker = None fname = self.create_time_series_hash_key(market_data_request, ticker) if (fname in self._time_series_cache): data_frame = self._time_series_cache[fname] return self.filter.filter_time_series(market_data_request, data_frame) return None def create_time_series_hash_key(self, market_data_request, ticker=None): """ create_time_series_hash_key - Creates a hash key for retrieving the time series Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ if (isinstance(ticker, list)): ticker = ticker[0] return self.create_cache_file_name( self.create_category_key(market_data_request, ticker)) def download_intraday_tick(self, market_data_request, data_vendor): """ download_intraday_tick - Loads intraday time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ data_frame_agg = None calcuations = Calculations() ticker_cycle = 0 data_frame_group = [] # single threaded version # handle intraday ticker calls separately one by one if len(market_data_request.tickers) == 1 or DataConstants( ).market_thread_no['other'] == 1: for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway data_frame_single = data_vendor.load_ticker( market_data_request_single) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') data_frame_group.append(data_frame_single) # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single # key = self.create_category_key(market_data_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_group is not None: data_frame_agg = calcuations.pandas_outer_join( data_frame_group) return data_frame_agg else: market_data_request_list = [] # create a list of MarketDataRequests for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if hasattr(market_data_request, 'vendor_tickers'): market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 market_data_request_list.append(market_data_request_single) return self.fetch_group_time_series(market_data_request_list) def fetch_single_time_series(self, market_data_request): data_frame_single = self.get_data_vendor( market_data_request.data_source).load_ticker(market_data_request) if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' # will fail for dataframes which includes dates try: data_frame_single = data_frame_single.astype('float32') except: pass if market_data_request.freq == "second": data_frame_single = data_frame_single.resample("1s") return data_frame_single def fetch_group_time_series(self, market_data_request_list): data_frame_agg = None # depends on the nature of operation as to whether we should use threading or multiprocessing library if DataConstants().market_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = DataConstants().market_thread_no['other'] if market_data_request_list[0].data_source in DataConstants( ).market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request_list[0].data_source] if thread_no > 0: pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, market_data_request_list) data_frame_group = result.get() pool.close() pool.join() else: data_frame_group = [] for md_request in market_data_request_list: data_frame_group.append( self.fetch_single_time_series(md_request)) # collect together all the time series if data_frame_group is not None: data_frame_group = [i for i in data_frame_group if i is not None] if data_frame_group is not None: data_frame_agg = self.calculations.pandas_outer_join( data_frame_group) return data_frame_agg def download_daily(self, market_data_request, data_vendor): """ download_daily - Loads daily time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ # daily data does not include ticker in the key, as multiple tickers in the same file if DataConstants().market_thread_no['other'] == 1: data_frame_agg = data_vendor.load_ticker(market_data_request) else: market_data_request_list = [] group_size = int( len(market_data_request.tickers) / DataConstants().market_thread_no['other'] - 1) if group_size == 0: group_size = 1 # split up tickers into groups related to number of threads to call for i in range(0, len(market_data_request.tickers), group_size): market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = market_data_request.tickers[ i:i + group_size] if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = \ market_data_request.vendor_tickers[i:i + group_size] market_data_request_list.append(market_data_request_single) data_frame_agg = self.fetch_group_time_series( market_data_request_list) key = self.create_category_key(market_data_request) fname = self.create_cache_file_name(key) self._time_series_cache[ fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg def create_category_key(self, market_data_request, ticker=None): """ create_category_key - Returns a category key for the associated MarketDataRequest Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ category = 'default-cat' cut = 'default-cut' if market_data_request.category is not None: category = market_data_request.category environment = market_data_request.environment source = market_data_request.data_source freq = market_data_request.freq if market_data_request.cut is not None: cut = market_data_request.cut if (ticker is not None): key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker else: key = environment + "." + category + '.' + source + '.' + freq + '.' + cut return key def create_cache_file_name(self, filename): return DataConstants().folder_time_series_data + "/" + filename
def get_data_vendor(self, source): """Loads appropriate data service class Parameters ---------- source : str the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc. we can also have forms like "bloomberg-boe" separated by hyphens Returns ------- DataVendor """ logger = LoggerManager().getLogger(__name__) data_vendor = None try: source = source.split("-")[0] except: logger.error("Was data source specified?") return None if source == 'bloomberg': try: from findatapy.market.datavendorbbg import DataVendorBBGOpen data_vendor = DataVendorBBGOpen() except: logger.warn("Bloomberg needs to be installed") elif source == 'quandl': from findatapy.market.datavendorweb import DataVendorQuandl data_vendor = DataVendorQuandl() elif source == 'eikon': from findatapy.market.datavendorweb import DataVendorEikon data_vendor = DataVendorEikon() elif source == 'ons': from findatapy.market.datavendorweb import DataVendorONS data_vendor = DataVendorONS() elif source == 'boe': from findatapy.market.datavendorweb import DataVendorBOE data_vendor = DataVendorBOE() elif source == 'dukascopy': from findatapy.market.datavendorweb import DataVendorDukasCopy data_vendor = DataVendorDukasCopy() elif source == 'fxcm': from findatapy.market.datavendorweb import DataVendorFXCM data_vendor = DataVendorFXCM() elif source == 'alfred': from findatapy.market.datavendorweb import DataVendorALFRED data_vendor = DataVendorALFRED() elif source == 'yahoo': from findatapy.market.datavendorweb import DataVendorYahoo data_vendor = DataVendorYahoo() elif source in ['google', 'fred', 'oecd', 'eurostat', 'edgar-index']: from findatapy.market.datavendorweb import DataVendorPandasWeb data_vendor = DataVendorPandasWeb() elif source == 'bitcoincharts': from findatapy.market.datavendorweb import DataVendorBitcoincharts data_vendor = DataVendorBitcoincharts() elif source == 'poloniex': from findatapy.market.datavendorweb import DataVendorPoloniex data_vendor = DataVendorPoloniex() elif source == 'binance': from findatapy.market.datavendorweb import DataVendorBinance data_vendor = DataVendorBinance() elif source == 'bitfinex': from findatapy.market.datavendorweb import DataVendorBitfinex data_vendor = DataVendorBitfinex() elif source == 'gdax': from findatapy.market.datavendorweb import DataVendorGdax data_vendor = DataVendorGdax() elif source == 'kraken': from findatapy.market.datavendorweb import DataVendorKraken data_vendor = DataVendorKraken() elif source == 'bitmex': from findatapy.market.datavendorweb import DataVendorBitmex data_vendor = DataVendorBitmex() elif '.csv' in source or '.h5' in source or '.parquet' in source: from findatapy.market.datavendorweb import DataVendorFlatFile data_vendor = DataVendorFlatFile() elif source == 'alphavantage': from findatapy.market.datavendorweb import DataVendorAlphaVantage data_vendor = DataVendorAlphaVantage() elif source == 'huobi': from findatapy.market.datavendorweb import DataVendorHuobi data_vendor = DataVendorHuobi() # TODO add support for other data sources (like Reuters) return data_vendor
def get_data_vendor(self, md_request): """Loads appropriate data vendor class Parameters ---------- md_request : MarketDataRequest the data_source to use "bloomberg", "quandl", "yahoo", "google", "fred" etc. we can also have forms like "bloomberg-boe" separated by hyphens Returns ------- DataVendor """ logger = LoggerManager().getLogger(__name__) data_source = md_request.data_source data_engine = md_request.data_engine # Special case for files (csv, h5, parquet or zip) if ".csv" in str(data_source) or ".h5" in str(data_source) or \ ".parquet" in str(data_source) or ".zip" in str(data_source) \ or data_engine is not None: from findatapy.market.datavendorweb import DataVendorFlatFile data_vendor = DataVendorFlatFile() else: try: data_source = data_source.split("-")[0] except: logger.error("Was data data_source specified?") return None if data_source == "bloomberg": try: from findatapy.market.datavendorbbg import \ DataVendorBBGOpen data_vendor = DataVendorBBGOpen() except: logger.warn("Bloomberg needs to be installed") elif data_source == "quandl": from findatapy.market.datavendorweb import DataVendorQuandl data_vendor = DataVendorQuandl() elif data_source == "eikon": from findatapy.market.datavendorweb import DataVendorEikon data_vendor = DataVendorEikon() elif data_source == "ons": from findatapy.market.datavendorweb import DataVendorONS data_vendor = DataVendorONS() elif data_source == "boe": from findatapy.market.datavendorweb import DataVendorBOE data_vendor = DataVendorBOE() elif data_source == "dukascopy": from findatapy.market.datavendorweb import DataVendorDukasCopy data_vendor = DataVendorDukasCopy() elif data_source == "fxcm": from findatapy.market.datavendorweb import DataVendorFXCM data_vendor = DataVendorFXCM() elif data_source == "alfred": from findatapy.market.datavendorweb import DataVendorALFRED data_vendor = DataVendorALFRED() elif data_source == "yahoo": from findatapy.market.datavendorweb import DataVendorYahoo data_vendor = DataVendorYahoo() elif data_source in ["google", "fred", "oecd", "eurostat", "edgar-index"]: from findatapy.market.datavendorweb import DataVendorPandasWeb data_vendor = DataVendorPandasWeb() elif data_source == "bitcoincharts": from findatapy.market.datavendorweb import \ DataVendorBitcoincharts data_vendor = DataVendorBitcoincharts() elif data_source == "poloniex": from findatapy.market.datavendorweb import DataVendorPoloniex data_vendor = DataVendorPoloniex() elif data_source == "binance": from findatapy.market.datavendorweb import DataVendorBinance data_vendor = DataVendorBinance() elif data_source == "bitfinex": from findatapy.market.datavendorweb import DataVendorBitfinex data_vendor = DataVendorBitfinex() elif data_source == "gdax": from findatapy.market.datavendorweb import DataVendorGdax data_vendor = DataVendorGdax() elif data_source == "kraken": from findatapy.market.datavendorweb import DataVendorKraken data_vendor = DataVendorKraken() elif data_source == "bitmex": from findatapy.market.datavendorweb import DataVendorBitmex data_vendor = DataVendorBitmex() elif data_source == "alphavantage": from findatapy.market.datavendorweb import \ DataVendorAlphaVantage data_vendor = DataVendorAlphaVantage() elif data_source == "huobi": from findatapy.market.datavendorweb import DataVendorHuobi data_vendor = DataVendorHuobi() elif data_source in self._data_vendor_dict: data_vendor = self._data_vendor_dict[data_source] else: logger.warn(str(data_source) + " is an unrecognized data source") return data_vendor
class DataVendor(object): """Abstract class for various data source loaders. """ def __init__(self): self.config = ConfigManager().get_instance() self.logger = LoggerManager().getLogger(__name__) # self.config = None return @abc.abstractmethod def load_ticker(self, market_data_request): """Retrieves market data from external data source Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ return # to be implemented by subclasses @abc.abstractmethod def kill_session(self): return def construct_vendor_market_data_request(self, market_data_request): """Creates a MarketDataRequest with the vendor tickers Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- MarketDataRequest """ symbols_vendor = self.translate_to_vendor_ticker(market_data_request) fields_vendor = self.translate_to_vendor_field(market_data_request) market_data_request_vendor = MarketDataRequest( md_request=market_data_request) market_data_request_vendor.tickers = symbols_vendor market_data_request_vendor.fields = fields_vendor return market_data_request_vendor def translate_to_vendor_field(self, market_data_request): """Converts all the fields from findatapy fields to vendor fields Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ if market_data_request.vendor_fields is not None: return market_data_request.vendor_fields source = market_data_request.data_source fields_list = market_data_request.fields if isinstance(fields_list, str): fields_list = [fields_list] if self.config is None: return fields_list fields_converted = [] for field in fields_list: try: f = self.config.convert_library_to_vendor_field(source, field) except: self.logger.warn( "Couldn't find field conversion, did you type it correctly: " + field) return fields_converted.append(f) return fields_converted # translate findatapy ticker to vendor ticker def translate_to_vendor_ticker(self, market_data_request): """Converts all the tickers from findatapy tickers to vendor tickers Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ if market_data_request.vendor_tickers is not None: return market_data_request.vendor_tickers category = market_data_request.category source = market_data_request.data_source freq = market_data_request.freq cut = market_data_request.cut tickers_list = market_data_request.tickers if isinstance(tickers_list, str): tickers_list = [tickers_list] if self.config is None: return tickers_list tickers_list_converted = [] for ticker in tickers_list: try: t = self.config.convert_library_to_vendor_ticker( category, source, freq, cut, ticker) except: self.logger.error( "Couldn't find ticker conversion, did you type it correctly: " + ticker) return tickers_list_converted.append(t) return tickers_list_converted def translate_from_vendor_field(self, vendor_fields_list, market_data_request): """Converts all the fields from vendors fields to findatapy fields Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ data_source = market_data_request.data_source if isinstance(vendor_fields_list, str): vendor_fields_list = [vendor_fields_list] # if self.config is None: return vendor_fields_list fields_converted = [] # if we haven't set the configuration files for automatic configuration if market_data_request.vendor_fields is not None: dictionary = dict( zip(market_data_request.vendor_fields, market_data_request.fields)) for vendor_field in vendor_fields_list: try: fields_converted.append(dictionary[vendor_field]) except: fields_converted.append(vendor_field) # otherwise used stored configuration files (every field needs to be defined!) else: for vendor_field in vendor_fields_list: try: v = self.config.convert_vendor_to_library_field( data_source, vendor_field) except: self.logger.error( "Couldn't find field conversion, did you type it correctly: " + vendor_field + ", using 'close' as default.") v = 'close' fields_converted.append(v) return fields_converted # translate findatapy ticker to vendor ticker def translate_from_vendor_ticker(self, vendor_tickers_list, market_data_request): """Converts all the fields from vendor tickers to findatapy tickers Parameters ---------- market_data_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- List of Strings """ if market_data_request.vendor_tickers is not None: dictionary = dict( zip(market_data_request.vendor_tickers, market_data_request.tickers)) tickers_stuff = [] for vendor_ticker in vendor_tickers_list: tickers_stuff.append(dictionary[vendor_ticker]) return tickers_stuff # [item for sublist in tickers_stuff for item in sublist] data_source = market_data_request.data_source # tickers_list = market_data_request.tickers if isinstance(vendor_tickers_list, str): vendor_tickers_list = [vendor_tickers_list] if self.config is None: return vendor_tickers_list tickers_converted = [] for vendor_ticker in vendor_tickers_list: try: v = self.config.convert_vendor_to_library_ticker( data_source, vendor_ticker) except: self.logger.error( "Couldn't find ticker conversion, did you type it correctly: " + vendor_ticker) return tickers_converted.append(v) return tickers_converted