class BacktestRequest(TimeSeriesRequest): def __init__(self): super(TimeSeriesRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None self.__tech_params = TechParams() @property def signal_name(self): return self.__signal_name @signal_name.setter def signal_name(self, signal_name): self.__signal_name = signal_name @property def tech_params(self): return self.__tech_params @tech_params.setter def tech_params(self, tech_params): self.__tech_params = tech_params @property def spot_tc_bp(self): return self.__spot_tc_bp @spot_tc_bp.setter def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0) @property def asset(self): return self.__asset @asset.setter def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.") self.__asset = asset @property def instrument(self): return self.__instrument @instrument.setter def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument
class BacktestRequest(TimeSeriesRequest): def __init__(self): super(BacktestRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) self.__signal_name = None self.__tech_params = TechParams() @property def signal_name(self): return self.__signal_name @signal_name.setter def signal_name(self, signal_name): self.__signal_name = signal_name @property def tech_params(self): return self.__tech_params @tech_params.setter def tech_params(self, tech_params): self.__tech_params = tech_params @property def spot_tc_bp(self): return self.__spot_tc_bp @spot_tc_bp.setter def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0) @property def asset(self): return self.__asset @asset.setter def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.") self.__asset = asset @property def instrument(self): return self.__instrument @instrument.setter def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument
class TimeSeriesRequest: # properties # # data_source eg. bbg, yahoo, quandl # start_date # finish_date # tickers (can be list) eg. EURUSD # category (eg. fx, equities, fixed_income, cal_event, fundamental) # freq_mult (eg. 1) # freq # gran_freq (minute, daily, hourly, daily, weekly, monthly, yearly) # fields (can be list) # vendor_tickers (optional) # vendor_fields (optional) # cache_algo (eg. internet, disk, memory) - internet will forcibly download from the internet # environment (eg. prod, backtest) - old data is saved with prod, backtest will overwrite the last data point def __init__(self, data_source = None, start_date = None, finish_date = None, tickers = None, category = None, freq_mult = None, freq = None, gran_freq = None, cut = None, fields = None, cache_algo = None, vendor_tickers = None, vendor_fields = None, environment = None ): self.logger = LoggerManager().getLogger(__name__) self.freq_mult = 1 if data_source is not None: self.data_source = data_source if start_date is not None: self.start_date = start_date if finish_date is not None: self.finish_date = finish_date if tickers is not None: self.tickers = tickers if category is not None: self.category = category if freq_mult is not None: self.freq_mult = freq_mult if freq is not None: self.freq = freq if cut is not None: self.cut = cut if fields is not None: self.fields = fields if cache_algo is not None: self.cache_algo = cache_algo if vendor_tickers is not None: self.vendor_tickers = vendor_tickers if vendor_fields is not None: self.vendor_fields = vendor_fields if environment is not None: self.environment = environment @property def data_source(self): return self.__data_source @data_source.setter def data_source(self, data_source): valid_data_source = ['ats', 'bloomberg', 'dukascopy', 'gain', 'quandl', 'yahoo'] if not data_source in valid_data_source: self.logger.warning(data_source & " is not a defined data source.") self.__data_source = data_source @property def category(self): return self.__category @category.setter def category(self, category): self.__category = category @property def tickers(self): return self.__tickers @tickers.setter def tickers(self, tickers): if not isinstance(tickers, list): tickers = [tickers] self.__tickers = tickers @property def fields(self): return self.__fields @fields.setter def fields(self, fields): valid_fields = ['open', 'high', 'low', 'close', 'volume', 'numEvents'] if not isinstance(fields, list): fields = [fields] for field_entry in fields: if not field_entry in valid_fields: i = 0 # self.logger.warning(field_entry + " is not a valid field.") # add error checking self.__fields = fields @property def vendor_tickers(self): return self.__vendor_tickers @vendor_tickers.setter def vendor_tickers(self, vendor_tickers): if not isinstance(vendor_tickers, list): vednor_tickers = [vendor_tickers] self.__vendor_tickers = vendor_tickers @property def vendor_fields(self): return self.__vendor_fields @vendor_fields.setter def vendor_fields(self, vendor_fields): if not isinstance(vendor_fields, list): vendor_fields = [vendor_fields] self.__vendor_fields = vendor_fields @property def freq(self): return self.__freq @freq.setter def freq(self, freq): freq = freq.lower() valid_freq = ['tick', 'intraday', 'daily'] if not freq in valid_freq: self.logger.warning(freq & " is not a defined frequency") self.__freq = freq @property def gran_freq(self): return self.__gran_freq @gran_freq.setter def gran_freq(self, gran_freq): gran_freq = gran_freq.lower() valid_gran_freq = ['tick', 'minute', 'hourly', 'pseudodaily', 'daily', 'weekly', 'monthly', 'quarterly', 'yearly'] if not gran_freq in valid_gran_freq: self.logger.warning(gran_freq & " is not a defined frequency") if gran_freq in ['minute', 'hourly']: self.__freq = 'intraday' elif gran_freq in ['tick']: self.__freq = 'tick' else: self.__freq = 'daily' self.__gran_freq = gran_freq @property def freq_mult(self): return self.__freq_mult @freq_mult.setter def freq_mult(self, freq_mult): self.__freq_mult = freq_mult @property def start_date(self): return self.__start_date @start_date.setter def start_date(self, start_date): self.__start_date = self.date_parser(start_date) @property def finish_date(self): return self.__finish_date @finish_date.setter def finish_date(self, finish_date): self.__finish_date = self.date_parser(finish_date) @property def cut(self): return self.__cut @cut.setter def cut(self, cut): self.__cut = cut def date_parser(self, date): if isinstance(date, str): # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M' try: date = datetime.strptime(date, '%b %d %Y %H:%M') except: # self.logger.warning("Attempted to parse date") i = 0 # format expected '1 Jun 2005 01:33', '%d %b %Y %H:%M' try: date = datetime.strptime(date, '%d %b %Y %H:%M') except: # self.logger.warning("Attempted to parse date") i = 0 try: date = datetime.strptime(date, '%b %d %Y') except: # self.logger.warning("Attempted to parse date") i = 0 try: date = datetime.strptime(date, '%d %b %Y') except: # self.logger.warning("Attempted to parse date") i = 0 return date @property def cache_algo(self): return self.__cache_algo @cache_algo.setter def cache_algo(self, cache_algo): cache_algo = cache_algo.lower() valid_cache_algo = ['internet_load', 'internet_load_return', 'cache_algo', 'cache_algo_return'] if not cache_algo in valid_cache_algo: self.logger.warning(cache_algo + " is not a defined caching scheme") self.__cache_algo = cache_algo @property def environment(self): return self.__environment @environment.setter def environment(self, environment): environment = environment.lower() valid_environment= ['prod', 'backtest'] if not environment in valid_environment: self.logger.warning(environment & " is not a defined environment.") self.__environment = environment
class BacktestRequest(TimeSeriesRequest): def __init__(self): super(TimeSeriesRequest, self).__init__() self.logger = LoggerManager().getLogger(__name__) @property def opt_tc_bp(self): return self.__opt_tc_bp @opt_tc_bp.setter def opt_tc_bp(self, opt_tc_bp): self.__opt_tc_bp = opt_tc_bp / (2.0 * 100.0 * 100.0) @property def spot_tc_bp(self): return self.__spot_tc_bp @spot_tc_bp.setter def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0) @property def asset(self): return self.__asset @asset.setter def asset(self, asset): valid_asset = ['fx', 'multi-asset'] if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.") self.__asset = asset @property def instrument(self): return self.__instrument @instrument.setter def instrument(self, instrument): valid_instrument = ['spot', 'futures', 'options'] if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.") self.__instrument = instrument @property def tenor(self): return self.__tenor @tenor.setter def tenor(self, tenor): self.__tenor = tenor @property def strike(self): return self.__strike @tenor.setter def strike(self, strike): self.__strike = strike @property def delta_threshold(self): return self.__delta_threshold @delta_threshold.setter def delta_threshold(self, delta_threshold): self.__delta_threshold = delta_threshold
class LoaderDukasCopy(LoaderTemplate): tick_name = "{symbol}/{year}/{month}/{day}/{hour}h_ticks.bi5" def __init__(self): super(LoaderTemplate, self).__init__() self.logger = LoggerManager().getLogger(__name__) import logging logging.getLogger("requests").setLevel(logging.WARNING) # implement method in abstract superclass def load_ticker(self, time_series_request): """ load_ticker - Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- time_series_request : TimeSeriesRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ time_series_request_vendor = self.construct_vendor_time_series_request( time_series_request) data_frame = None self.logger.info("Request Dukascopy data") # doesn't support non-tick data if (time_series_request.freq in [ 'daily', 'weekly', 'monthly', 'quarterly', 'yearly', 'intraday', 'minute', 'hourly' ]): self.logger.warning("Dukascopy loader is for tick data only") return None # assume one ticker only (LightTimeSeriesFactory only calls one ticker at a time) if (time_series_request.freq in ['tick']): # time_series_request_vendor.tickers = time_series_request_vendor.tickers[0] data_frame = self.get_tick(time_series_request, time_series_request_vendor) if data_frame is not None: data_frame.tz_localize('UTC') self.logger.info("Completed request from Dukascopy") return data_frame def kill_session(self): return def get_tick(self, time_series_request, time_series_request_vendor): data_frame = self.download_tick(time_series_request_vendor) # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns returned_tickers = [time_series_request_vendor.tickers[0] ] * (len(returned_fields)) if data_frame is not None: fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker( returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' return data_frame def download_tick(self, time_series_request): symbol = time_series_request.tickers[0] df_list = [] self.logger.info("About to download from Dukascopy... for " + symbol) # single threaded df_list = [ self.fetch_file(time, symbol) for time in self.hour_range(time_series_request.start_date, time_series_request.finish_date) ] # parallel (has pickle issues) # time_list = self.hour_range(time_series_request.start_date, time_series_request.finish_date) # df_list = Parallel(n_jobs=-1)(delayed(self.fetch_file)(time, symbol) for time in time_list) try: return pandas.concat(df_list) except: return None def fetch_file(self, time, symbol): if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time)) tick_path = self.tick_name.format(symbol=symbol, year=str(time.year).rjust(4, '0'), month=str(time.month).rjust(2, '0'), day=str(time.day).rjust(2, '0'), hour=str(time.hour).rjust(2, '0')) tick = self.fetch_tick(Constants().dukascopy_base_url + tick_path) if Constants().dukascopy_write_temp_tick_disk: out_path = Constants( ).temp_pythalesians_folder + "/dkticks/" + tick_path if not os.path.exists(out_path): if not os.path.exists(os.path.dirname(out_path)): os.makedirs(os.path.dirname(out_path)) self.write_tick(tick, out_path) try: return self.retrieve_df(lzma.decompress(tick), symbol, time) except: return None def fetch_tick(self, tick_url): i = 0 tick_request = None # try up to 5 times to download while i < 5: try: tick_request = requests.get(tick_url) i = 5 except: i = i + 1 if (tick_request is None): self.logger("Failed to download from " + tick_url) return None return tick_request.content def write_tick(self, content, out_path): data_file = open(out_path, "wb+") data_file.write(content) data_file.close() def chunks(self, list, n): if n < 1: n = 1 return [list[i:i + n] for i in range(0, len(list), n)] def retrieve_df(self, data, symbol, epoch): date, tuple = pythalesians.market.loaders.lowlevel.brokers.parserows.parse_tick_data( data, epoch) df = pandas.DataFrame(data=tuple, columns=['temp', 'bid', 'ask', 'bidv', 'askv'], index=date) df.drop('temp', axis=1) df.index.name = 'Date' divisor = 100000 # where JPY is the terms currency we have different divisor if symbol[3:6] == 'JPY': divisor = 1000 # prices are returned without decimal point df['bid'] = df['bid'] / divisor df['ask'] = df['ask'] / divisor return df def hour_range(self, start_date, end_date): delta_t = end_date - start_date delta_hours = (delta_t.days * 24.0) + (delta_t.seconds / 3600.0) for n in range(int(delta_hours)): yield start_date + timedelta(0, 0, 0, 0, 0, n) # Hours def get_daily_data(self): pass
class LoaderDukasCopy(LoaderTemplate): tick_name = "{symbol}/{year}/{month}/{day}/{hour}h_ticks.bi5" def __init__(self): super(LoaderTemplate, self).__init__() self.logger = LoggerManager().getLogger(__name__) import logging logging.getLogger("requests").setLevel(logging.WARNING) # implement method in abstract superclass def load_ticker(self, time_series_request): """ load_ticker - Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- time_series_request : TimeSeriesRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request) data_frame = None self.logger.info("Request Dukascopy data") # doesn't support non-tick data if (time_series_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly', 'intraday', 'minute', 'hourly']): self.logger.warning("Dukascopy loader is for tick data only") return None # assume one ticker only (LightTimeSeriesFactory only calls one ticker at a time) if (time_series_request.freq in ['tick']): # time_series_request_vendor.tickers = time_series_request_vendor.tickers[0] data_frame = self.get_tick(time_series_request, time_series_request_vendor) if data_frame is not None: data_frame.tz_localize('UTC') self.logger.info("Completed request from Dukascopy") return data_frame def kill_session(self): return def get_tick(self, time_series_request, time_series_request_vendor): data_frame = self.download_tick(time_series_request_vendor) # convert from vendor to Thalesians tickers/fields if data_frame is not None: returned_fields = data_frame.columns returned_tickers = [time_series_request_vendor.tickers[0]] * (len(returned_fields)) if data_frame is not None: fields = self.translate_from_vendor_field(returned_fields, time_series_request) tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request) ticker_combined = [] for i in range(0, len(fields)): ticker_combined.append(tickers[i] + "." + fields[i]) data_frame.columns = ticker_combined data_frame.index.name = 'Date' return data_frame def download_tick(self, time_series_request): symbol = time_series_request.tickers[0] df_list = [] self.logger.info("About to download from Dukascopy... for " + symbol) # single threaded df_list = [self.fetch_file(time, symbol) for time in self.hour_range(time_series_request.start_date, time_series_request.finish_date)] # parallel (has pickle issues) # time_list = self.hour_range(time_series_request.start_date, time_series_request.finish_date) # df_list = Parallel(n_jobs=-1)(delayed(self.fetch_file)(time, symbol) for time in time_list) try: return pandas.concat(df_list) except: return None def fetch_file(self, time, symbol): if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time)) tick_path = self.tick_name.format( symbol = symbol, year = str(time.year).rjust(4, '0'), month = str(time.month).rjust(2, '0'), day = str(time.day).rjust(2, '0'), hour = str(time.hour).rjust(2, '0') ) tick = self.fetch_tick(Constants().dukascopy_base_url + tick_path) if Constants().dukascopy_write_temp_tick_disk: out_path = Constants().temp_pythalesians_folder + "/dkticks/" + tick_path if not os.path.exists(out_path): if not os.path.exists(os.path.dirname(out_path)): os.makedirs(os.path.dirname(out_path)) self.write_tick(tick, out_path) try: return self.retrieve_df(lzma.decompress(tick), symbol, time) except: return None def fetch_tick(self, tick_url): i = 0 tick_request = None # try up to 5 times to download while i < 5: try: tick_request = requests.get(tick_url) i = 5 except: i = i + 1 if (tick_request is None): self.logger("Failed to download from " + tick_url) return None return tick_request.content def write_tick(self, content, out_path): data_file = open(out_path, "wb+") data_file.write(content) data_file.close() def chunks(self, list, n): if n < 1: n = 1 return [list[i:i + n] for i in range(0, len(list), n)] def retrieve_df(self, data, symbol, epoch): date, tuple = pythalesians.market.loaders.lowlevel.brokers.parserows.parse_tick_data(data, epoch) df = pandas.DataFrame(data = tuple, columns=['temp', 'bid', 'ask', 'bidv', 'askv'], index = date) df.drop('temp', axis = 1) df.index.name = 'Date' divisor = 100000 # where JPY is the terms currency we have different divisor if symbol[3:6] == 'JPY': divisor = 1000 # prices are returned without decimal point df['bid'] = df['bid'] / divisor df['ask'] = df['ask'] / divisor return df def hour_range(self, start_date, end_date): delta_t = end_date - start_date delta_hours = (delta_t.days * 24.0) + (delta_t.seconds / 3600.0) for n in range(int (delta_hours)): yield start_date + timedelta(0, 0, 0, 0, 0, n) # Hours def get_daily_data(self): pass