def fetch_file(self, time, symbol): if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time)) tick_path = self.tick_name.format( symbol = symbol, year = str(time.year).rjust(4, '0'), month = str(time.month).rjust(2, '0'), day = str(time.day).rjust(2, '0'), hour = str(time.hour).rjust(2, '0') ) tick = self.fetch_tick(DataConstants().dukascopy_base_url + tick_path) if DataConstants().dukascopy_write_temp_tick_disk: out_path = DataConstants().temp_folder + "/dkticks/" + tick_path if not os.path.exists(out_path): if not os.path.exists(os.path.dirname(out_path)): os.makedirs(os.path.dirname(out_path)) self.write_tick(tick, out_path) try: return self.retrieve_df(lzma.decompress(tick), symbol, time) except: return None
def __init__(self, market_data_generator = None): self.logger = LoggerManager().getLogger(__name__) self._all_econ_tickers = pandas.read_csv(DataConstants().all_econ_tickers) self._econ_country_codes = pandas.read_csv(DataConstants().econ_country_codes) self._econ_country_groups = pandas.read_csv(DataConstants().econ_country_groups) if market_data_generator is None: self.market_data_generator = MarketDataGenerator() else: self.market_data_generator = market_data_generator
def __init__(self, market_data_generator = None, md_request = None): if market_data_generator is None: if DataConstants().default_market_data_generator == "marketdatagenerator": from findatapy.market import MarketDataGenerator market_data_generator = MarketDataGenerator() elif DataConstants().default_market_data_generator == 'cachedmarketdatagenerator': # NOT CURRENTLY IMPLEMENTED FOR FUTURE USE from finaddpy.market import CachedMarketDataGenerator market_data_generator = CachedMarketDataGenerator() self.market_data_generator = market_data_generator self.md_request = md_request
def download_daily(self, market_data_request, data_vendor): """ download_daily - Loads daily time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ # daily data does not include ticker in the key, as multiple tickers in the same file if DataConstants().market_thread_no['other'] == 1: data_frame_agg = data_vendor.load_ticker(market_data_request) else: market_data_request_list = [] # when trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution group_size = max( int( len(market_data_request.tickers) / DataConstants().market_thread_no['other'] - 1), 0) if group_size == 0: group_size = 1 # split up tickers into groups related to number of threads to call for i in range(0, len(market_data_request.tickers), group_size): market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = market_data_request.tickers[ i:i + group_size] if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = \ market_data_request.vendor_tickers[i:i + group_size] market_data_request_list.append(market_data_request_single) data_frame_agg = self.fetch_group_time_series( market_data_request_list) key = self.create_category_key(market_data_request) fname = self.create_cache_file_name(key) self._time_series_cache[ fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg
def fetch_group_time_series(self, market_data_request_list): data_frame_agg = None thread_no = DataConstants().market_thread_no['other'] if market_data_request_list[0].data_source in DataConstants( ).market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request_list[0].data_source] if thread_no > 0: pool = SwimPool().create_pool( thread_technique=DataConstants().market_thread_technique, thread_no=thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, market_data_request_list) data_frame_group = result.get() pool.close() pool.join() else: data_frame_group = [] for md_request in market_data_request_list: data_frame_group.append( self.fetch_single_time_series(md_request)) # collect together all the time series if data_frame_group is not None: data_frame_group = [i for i in data_frame_group if i is not None] # for debugging! # import pickle # import datetime # pickle.dump(data_frame_group, open(str(datetime.datetime.now()).replace(':', '-').replace(' ', '-').replace(".", "-") + ".p", "wb")) if data_frame_group is not None: try: data_frame_agg = self.calculations.pandas_outer_join( data_frame_group) except Exception as e: self.logger.warning( 'Possible overlap of columns? Have you specifed same ticker several times: ' + str(e)) return data_frame_agg
def __init__(self, multiprocessing_library = None): self._pool = None if multiprocessing_library is None: multiprocessing_library = DataConstants().multiprocessing_library self._multiprocessing_library = multiprocessing_library self._thread_technique = 'na' if multiprocessing_library == 'multiprocess': try: import multiprocess; multiprocess.freeze_support() except: pass elif multiprocessing_library == 'multiprocessing_on_dill': try: import multiprocessing_on_dill; multiprocessing_on_dill.freeze_support() except: pass elif multiprocessing_library == 'multiprocessing': try: import multiprocessing; multiprocessing.freeze_support() except: pass
def fetch_group_time_series(self, market_data_request_list): data_frame_agg = None # depends on the nature of operation as to whether we should use threading or multiprocessing library if DataConstants().market_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = DataConstants().market_thread_no['other'] if market_data_request_list[0].data_source in DataConstants( ).market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request_list[0].data_source] if thread_no > 0: pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, market_data_request_list) data_frame_group = result.get() pool.close() pool.join() else: data_frame_group = [] for md_request in market_data_request_list: data_frame_group.append( self.fetch_single_time_series(md_request)) # collect together all the time series if data_frame_group is not None: data_frame_group = [i for i in data_frame_group if i is not None] if data_frame_group is not None: data_frame_agg = self.calculations.pandas_outer_join( data_frame_group) return data_frame_agg
def download_daily(self, market_data_request): trials = 0 data_frame = None while(trials < 5): try: data_frame = Quandl.get(market_data_request.tickers, authtoken=DataConstants().quandl_api_key, trim_start=market_data_request.start_date, trim_end=market_data_request.finish_date) break except: trials = trials + 1 self.logger.info("Attempting... " + str(trials) + " request to download from Quandl") if trials == 5: self.logger.error("Couldn't download from Quandl after several attempts!") return data_frame
# # Copyright 2016 Cuemacro # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the # License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # # See the License for the specific language governing permissions and limitations under the License. # from findatapy.util import DataConstants from findatapy.market.ioengine import SpeedCache constants = DataConstants() # from deco import * class Market(object): """Higher level class which fetches market data using underlying classes such as MarketDataGenerator. Also contains several other classes, which are for asset specific instances, for example for generating FX spot time series or FX volatility surfaces. """ def __init__(self, market_data_generator=None, md_request=None): if market_data_generator is None: if constants.default_market_data_generator == "marketdatagenerator": from findatapy.market import MarketDataGenerator market_data_generator = MarketDataGenerator()
def download_daily(self, market_data_request): trials = 0 data_frame_list = [] data_frame_release = [] # TODO refactor this code, a bit messy at the moment! for i in range(0, len(market_data_request.tickers)): while (trials < 5): try: fred = Fred(api_key=DataConstants().fred_api_key) # acceptable fields: close, actual-release, release-date-time-full if 'close' in market_data_request.fields and 'release-date-time-full' in market_data_request.fields: data_frame = fred.get_series_all_releases(market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame.columns = ['Date', market_data_request.tickers[i] + '.release-date-time-full', market_data_request.tickers[i] + '.close'] data_frame = data_frame.sort_values(by=['Date', market_data_request.tickers[i] + '.release-date-time-full']) data_frame = data_frame.drop_duplicates(subset=['Date'], keep='last') data_frame = data_frame.set_index(['Date']) filter = Filter() data_frame = filter.filter_time_series_by_date(market_data_request.start_date, market_data_request.finish_date, data_frame) data_frame_list.append(data_frame) elif 'close' in market_data_request.fields: data_frame = fred.get_series(series_id=market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame = pandas.DataFrame(data_frame) data_frame.columns = [market_data_request.tickers[i] + '.close'] data_frame_list.append(data_frame) if 'first-revision' in market_data_request.fields: data_frame = fred.get_series_first_revision(market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame = pandas.DataFrame(data_frame) data_frame.columns = [market_data_request.tickers[i] + '.first-revision'] filter = Filter() data_frame = filter.filter_time_series_by_date(market_data_request.start_date, market_data_request.finish_date, data_frame) data_frame_list.append(data_frame) if 'actual-release' in market_data_request.fields and 'release-date-time-full' in market_data_request.fields: data_frame = fred.get_series_all_releases(market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame.columns = ['Date', market_data_request.tickers[i] + '.release-date-time-full', market_data_request.tickers[i] + '.actual-release'] data_frame = data_frame.sort_values(by=['Date', market_data_request.tickers[i] + '.release-date-time-full']) data_frame = data_frame.drop_duplicates(subset=['Date'], keep='first') data_frame = data_frame.set_index(['Date']) filter = Filter() data_frame = filter.filter_time_series_by_date(market_data_request.start_date, market_data_request.finish_date, data_frame) data_frame_list.append(data_frame) elif 'actual-release' in market_data_request.fields: data_frame = fred.get_series_first_release(market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame = pandas.DataFrame(data_frame) data_frame.columns = [market_data_request.tickers[i] + '.actual-release'] filter = Filter() data_frame = filter.filter_time_series_by_date(market_data_request.start_date, market_data_request.finish_date, data_frame) data_frame_list.append(data_frame) elif 'release-date-time-full' in market_data_request.fields: data_frame = fred.get_series_all_releases(market_data_request.tickers[i], observation_start=market_data_request.start_date, observation_end=market_data_request.finish_date) data_frame = data_frame['realtime_start'] data_frame = pandas.DataFrame(data_frame) data_frame.columns = [market_data_request.tickers[i] + '.release-date-time-full'] data_frame.index = data_frame[market_data_request.tickers[i] + '.release-date-time-full'] data_frame = data_frame.sort() data_frame = data_frame.drop_duplicates() filter = Filter() data_frame_release.append(filter.filter_time_series_by_date(market_data_request.start_date, market_data_request.finish_date, data_frame)) break except: trials = trials + 1 self.logger.info("Attempting... " + str(trials) + " request to download from ALFRED/FRED") if trials == 5: self.logger.error("Couldn't download from ALFRED/FRED after several attempts!") calc = Calculations() data_frame1 = calc.pandas_outer_join(data_frame_list) data_frame2 = calc.pandas_outer_join(data_frame_release) data_frame = pandas.concat([data_frame1, data_frame2], axis=1) return data_frame
def get_fx_cross(self, start, end, cross, cut="NYC", source="bloomberg", freq="intraday", cache_algo='internet_load_return', type='spot', environment='backtest', fields=['close']): if source == "gain" or source == 'dukascopy' or freq == 'tick': return self.get_fx_cross_tick(start, end, cross, cut=cut, source=source, cache_algo=cache_algo, type='spot', fields=fields) if isinstance(cross, str): cross = [cross] market_data_request_list = [] freq_list = [] type_list = [] for cr in cross: market_data_request = MarketDataRequest(freq_mult=1, cut=cut, fields=['close'], freq=freq, cache_algo=cache_algo, start_date=start, finish_date=end, data_source=source, environment=environment) market_data_request.type = type market_data_request.cross = cr if freq == 'intraday': market_data_request.gran_freq = "minute" # intraday elif freq == 'daily': market_data_request.gran_freq = "daily" # daily market_data_request_list.append(market_data_request) data_frame_agg = [] # depends on the nature of operation as to whether we should use threading or multiprocessing library if DataConstants().market_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = DataConstants().market_thread_no['other'] if market_data_request_list[0].data_source in DataConstants( ).market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request_list[0].data_source] # fudge, issue with multithreading and accessing HDF5 files # if self.market_data_generator.__class__.__name__ == 'CachedMarketDataGenerator': # thread_no = 0 if (thread_no > 0): pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self._get_individual_fx_cross, market_data_request_list) data_frame_agg = self.calculations.iterative_outer_join( result.get()) # data_frame_agg = self.calculations.pandas_outer_join(result.get()) # pool would have already been closed earlier # try: # pool.close() # pool.join() # except: pass else: for md_request in market_data_request_list: data_frame_agg.append( self._get_individual_fx_cross(md_request)) data_frame_agg = self.calculations.pandas_outer_join( data_frame_agg) # strip the nan elements data_frame_agg = data_frame_agg.dropna() return data_frame_agg
def create_cache_file_name(self, filename): return DataConstants().folder_time_series_data + "/" + filename
def download_daily(self, market_data_request): """Loads daily time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ key = MarketDataRequest().create_category_key(market_data_request) is_key_overriden = False for k in DataConstants().override_multi_threading_for_categories: if k in key: is_key_overriden = True break # by default use other thread_no = DataConstants().market_thread_no['other'] if market_data_request.data_source in DataConstants().market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request.data_source] # daily data does not include ticker in the key, as multiple tickers in the same file if thread_no == 1: # data_frame_agg = data_vendor.load_ticker(market_data_request) data_frame_agg = self.fetch_single_time_series(market_data_request) else: market_data_request_list = [] # when trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution group_size = max( int(len(market_data_request.tickers) / thread_no - 1), 0) if group_size == 0: group_size = 1 # split up tickers into groups related to number of threads to call for i in range(0, len(market_data_request.tickers), group_size): market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = market_data_request.tickers[ i:i + group_size] if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = \ market_data_request.vendor_tickers[i:i + group_size] market_data_request_list.append(market_data_request_single) # special case where we make smaller calls one after the other if is_key_overriden: data_frame_list = [] for md in market_data_request_list: data_frame_list.append(self.fetch_single_time_series(md)) data_frame_agg = self.calculations.pandas_outer_join( data_frame_list) else: data_frame_agg = self.fetch_group_time_series( market_data_request_list) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg
def download_intraday_tick(self, market_data_request): """Loads intraday time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ data_frame_agg = None calcuations = Calculations() ticker_cycle = 0 data_frame_group = [] # single threaded version # handle intraday ticker calls separately one by one if len(market_data_request.tickers) == 1 or DataConstants( ).market_thread_no['other'] == 1: for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway # old_finish_date = market_data_request_single.finish_date # # market_data_request_single.finish_date = self.refine_expiry_date(market_data_request) # # if market_data_request_single.finish_date >= market_data_request_single.start_date: # data_frame_single = data_vendor.load_ticker(market_data_request_single) # else: # data_frame_single = None # # market_data_request_single.finish_date = old_finish_date # # data_frame_single = data_vendor.load_ticker(market_data_request_single) data_frame_single = self.fetch_single_time_series( market_data_request) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') data_frame_group.append(data_frame_single) # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single # key = self.create_category_key(market_data_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_group is not None: data_frame_agg = calcuations.pandas_outer_join( data_frame_group) return data_frame_agg else: market_data_request_list = [] # create a list of MarketDataRequests for ticker in market_data_request.tickers: market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = ticker if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = [ market_data_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 market_data_request_list.append(market_data_request_single) return self.fetch_group_time_series(market_data_request_list)
def auto_set_key(self): self.twitter = Twython(DataConstants().APP_KEY, DataConstants().APP_SECRET, DataConstants().OAUTH_TOKEN, DataConstants().OAUTH_TOKEN_SECRET)