Example #1
0
class BacktestRequest(TimeSeriesRequest):
    def __init__(self):
        super(TimeSeriesRequest, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        self.__signal_name = None
        self.__tech_params = TechParams()

    @property
    def signal_name(self):
        return self.__signal_name

    @signal_name.setter
    def signal_name(self, signal_name):
        self.__signal_name = signal_name

    @property
    def tech_params(self):
        return self.__tech_params

    @tech_params.setter
    def tech_params(self, tech_params):
        self.__tech_params = tech_params

    @property
    def spot_tc_bp(self):
        return self.__spot_tc_bp

    @spot_tc_bp.setter
    def spot_tc_bp(self, spot_tc_bp):
        self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0)

    @property
    def asset(self):
        return self.__asset

    @asset.setter
    def asset(self, asset):
        valid_asset = ['fx', 'multi-asset']

        if not asset in valid_asset:
            self.logger.warning(asset & " is not a defined asset.")

        self.__asset = asset

    @property
    def instrument(self):
        return self.__instrument

    @instrument.setter
    def instrument(self, instrument):
        valid_instrument = ['spot', 'futures', 'options']

        if not instrument in valid_instrument:
            self.logger.warning(instrument
                                & " is not a defined trading instrument.")

        self.__instrument = instrument
Example #2
0
class BacktestRequest(TimeSeriesRequest):

    def __init__(self):
        super(BacktestRequest, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        self.__signal_name = None
        self.__tech_params = TechParams()

    @property
    def signal_name(self):
        return self.__signal_name

    @signal_name.setter
    def signal_name(self, signal_name):
        self.__signal_name = signal_name

    @property
    def tech_params(self):
        return self.__tech_params

    @tech_params.setter
    def tech_params(self, tech_params):
        self.__tech_params = tech_params

    @property
    def spot_tc_bp(self):
        return self.__spot_tc_bp

    @spot_tc_bp.setter
    def spot_tc_bp(self, spot_tc_bp):
        self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0)

    @property
    def asset(self):
        return self.__asset

    @asset.setter
    def asset(self, asset):
        valid_asset = ['fx', 'multi-asset']

        if not asset in valid_asset:
            self.logger.warning(asset & " is not a defined asset.")

        self.__asset = asset

    @property
    def instrument(self):
        return self.__instrument

    @instrument.setter
    def instrument(self, instrument):
        valid_instrument = ['spot', 'futures', 'options']

        if not instrument in valid_instrument:
            self.logger.warning(instrument & " is not a defined trading instrument.")

        self.__instrument = instrument
Example #3
0
class TimeSeriesRequest:

    # properties
    #
    # data_source eg. bbg, yahoo, quandl
    # start_date
    # finish_date
    # tickers (can be list) eg. EURUSD
    # category (eg. fx, equities, fixed_income, cal_event, fundamental)
    # freq_mult (eg. 1)
    # freq
    # gran_freq (minute, daily, hourly, daily, weekly, monthly, yearly)
    # fields (can be list)
    # vendor_tickers (optional)
    # vendor_fields (optional)
    # cache_algo (eg. internet, disk, memory) - internet will forcibly download from the internet
    # environment (eg. prod, backtest) - old data is saved with prod, backtest will overwrite the last data point
    def __init__(self, data_source = None,
                 start_date = None, finish_date = None, tickers = None, category = None, freq_mult = None, freq = None,
                 gran_freq = None, cut = None,
                 fields = None, cache_algo = None,
                 vendor_tickers = None, vendor_fields = None,
                 environment = None
                 ):

        self.logger = LoggerManager().getLogger(__name__)

        self.freq_mult = 1

        if data_source is not None: self.data_source = data_source
        if start_date is not None: self.start_date = start_date
        if finish_date is not None: self.finish_date = finish_date
        if tickers is not None: self.tickers = tickers
        if category is not None: self.category = category
        if freq_mult is not None: self.freq_mult = freq_mult
        if freq is not None: self.freq = freq
        if cut is not None: self.cut = cut
        if fields is not None: self.fields = fields
        if cache_algo is not None: self.cache_algo = cache_algo
        if vendor_tickers is not None: self.vendor_tickers = vendor_tickers
        if vendor_fields is not None: self.vendor_fields = vendor_fields
        if environment is not None: self.environment = environment

    @property
    def data_source(self):
        return self.__data_source

    @data_source.setter
    def data_source(self, data_source):
        valid_data_source = ['ats', 'bloomberg', 'dukascopy', 'gain', 'quandl', 'yahoo']

        if not data_source in valid_data_source:
            self.logger.warning(data_source & " is not a defined data source.")

        self.__data_source = data_source

    @property
    def category(self):
        return self.__category

    @category.setter
    def category(self, category):
        self.__category = category

    @property
    def tickers(self):
        return self.__tickers

    @tickers.setter
    def tickers(self, tickers):
        if not isinstance(tickers, list):
            tickers = [tickers]

        self.__tickers = tickers

    @property
    def fields(self):
        return self.__fields

    @fields.setter
    def fields(self, fields):
        valid_fields = ['open', 'high', 'low', 'close', 'volume', 'numEvents']

        if not isinstance(fields, list):
            fields = [fields]

        for field_entry in fields:
            if not field_entry in valid_fields:
                i = 0
                # self.logger.warning(field_entry + " is not a valid field.")

        # add error checking

        self.__fields = fields

    @property
    def vendor_tickers(self):
        return self.__vendor_tickers

    @vendor_tickers.setter
    def vendor_tickers(self, vendor_tickers):
        if not isinstance(vendor_tickers, list):
            vednor_tickers = [vendor_tickers]

        self.__vendor_tickers = vendor_tickers

    @property
    def vendor_fields(self):
        return self.__vendor_fields

    @vendor_fields.setter
    def vendor_fields(self, vendor_fields):
        if not isinstance(vendor_fields, list):
            vendor_fields = [vendor_fields]

        self.__vendor_fields = vendor_fields

    @property
    def freq(self):
        return self.__freq

    @freq.setter
    def freq(self, freq):
        freq = freq.lower()

        valid_freq = ['tick', 'intraday', 'daily']

        if not freq in valid_freq:
            self.logger.warning(freq & " is not a defined frequency")

        self.__freq = freq

    @property
    def gran_freq(self):
        return self.__gran_freq

    @gran_freq.setter
    def gran_freq(self, gran_freq):
        gran_freq = gran_freq.lower()

        valid_gran_freq = ['tick', 'minute', 'hourly', 'pseudodaily', 'daily', 'weekly', 'monthly', 'quarterly', 'yearly']

        if not gran_freq in valid_gran_freq:
            self.logger.warning(gran_freq & " is not a defined frequency")

        if gran_freq in ['minute', 'hourly']:
            self.__freq = 'intraday'
        elif gran_freq in ['tick']:
            self.__freq = 'tick'
        else:
            self.__freq = 'daily'

        self.__gran_freq = gran_freq

    @property
    def freq_mult(self):
        return self.__freq_mult

    @freq_mult.setter
    def freq_mult(self, freq_mult):
        self.__freq_mult = freq_mult

    @property
    def start_date(self):
        return self.__start_date

    @start_date.setter
    def start_date(self, start_date):
        self.__start_date = self.date_parser(start_date)

    @property
    def finish_date(self):
        return self.__finish_date

    @finish_date.setter
    def finish_date(self, finish_date):
        self.__finish_date = self.date_parser(finish_date)

    @property
    def cut(self):
        return self.__cut

    @cut.setter
    def cut(self, cut):
        self.__cut = cut

    def date_parser(self, date):
        if isinstance(date, str):
            # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M'
            try:
                date = datetime.strptime(date, '%b %d %Y %H:%M')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

            # format expected '1 Jun 2005 01:33', '%d %b %Y %H:%M'
            try:
                date = datetime.strptime(date, '%d %b %Y %H:%M')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

            try:
                date = datetime.strptime(date, '%b %d %Y')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

            try:
                date = datetime.strptime(date, '%d %b %Y')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

        return date

    @property
    def cache_algo(self):
        return self.__cache_algo

    @cache_algo.setter
    def cache_algo(self, cache_algo):
        cache_algo = cache_algo.lower()

        valid_cache_algo = ['internet_load', 'internet_load_return', 'cache_algo', 'cache_algo_return']


        if not cache_algo in valid_cache_algo:
            self.logger.warning(cache_algo + " is not a defined caching scheme")

        self.__cache_algo = cache_algo

    @property
    def environment(self):
        return self.__environment

    @environment.setter
    def environment(self, environment):
        environment = environment.lower()

        valid_environment= ['prod', 'backtest']

        if not environment in valid_environment:
            self.logger.warning(environment & " is not a defined environment.")

        self.__environment = environment
Example #4
0
class BacktestRequest(TimeSeriesRequest):

    def __init__(self):
        super(TimeSeriesRequest, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    @property
    def opt_tc_bp(self):
        return self.__opt_tc_bp

    @opt_tc_bp.setter
    def opt_tc_bp(self, opt_tc_bp):
        self.__opt_tc_bp = opt_tc_bp / (2.0 * 100.0 * 100.0)

    @property
    def spot_tc_bp(self):
        return self.__spot_tc_bp

    @spot_tc_bp.setter
    def spot_tc_bp(self, spot_tc_bp):
        self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0)

    @property
    def asset(self):
        return self.__asset

    @asset.setter
    def asset(self, asset):
        valid_asset = ['fx', 'multi-asset']

        if not asset in valid_asset:
            self.logger.warning(asset & " is not a defined asset.")

        self.__asset = asset

    @property
    def instrument(self):
        return self.__instrument

    @instrument.setter
    def instrument(self, instrument):
        valid_instrument = ['spot', 'futures', 'options']

        if not instrument in valid_instrument:
            self.logger.warning(instrument & " is not a defined trading instrument.")

        self.__instrument = instrument

    @property
    def tenor(self):
        return self.__tenor

    @tenor.setter
    def tenor(self, tenor):
        self.__tenor = tenor

    @property
    def strike(self):
        return self.__strike

    @tenor.setter
    def strike(self, strike):
        self.__strike = strike

    @property
    def delta_threshold(self):
        return self.__delta_threshold

    @delta_threshold.setter
    def delta_threshold(self, delta_threshold):
        self.__delta_threshold = delta_threshold
class LoaderDukasCopy(LoaderTemplate):
    tick_name = "{symbol}/{year}/{month}/{day}/{hour}h_ticks.bi5"

    def __init__(self):
        super(LoaderTemplate, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        import logging
        logging.getLogger("requests").setLevel(logging.WARNING)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        time_series_request_vendor = self.construct_vendor_time_series_request(
            time_series_request)

        data_frame = None
        self.logger.info("Request Dukascopy data")

        # doesn't support non-tick data
        if (time_series_request.freq in [
                'daily', 'weekly', 'monthly', 'quarterly', 'yearly',
                'intraday', 'minute', 'hourly'
        ]):
            self.logger.warning("Dukascopy loader is for tick data only")

            return None

        # assume one ticker only (LightTimeSeriesFactory only calls one ticker at a time)
        if (time_series_request.freq in ['tick']):
            # time_series_request_vendor.tickers = time_series_request_vendor.tickers[0]

            data_frame = self.get_tick(time_series_request,
                                       time_series_request_vendor)

            if data_frame is not None: data_frame.tz_localize('UTC')

        self.logger.info("Completed request from Dukascopy")

        return data_frame

    def kill_session(self):
        return

    def get_tick(self, time_series_request, time_series_request_vendor):

        data_frame = self.download_tick(time_series_request_vendor)

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns
            returned_tickers = [time_series_request_vendor.tickers[0]
                                ] * (len(returned_fields))

        if data_frame is not None:
            fields = self.translate_from_vendor_field(returned_fields,
                                                      time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def download_tick(self, time_series_request):

        symbol = time_series_request.tickers[0]
        df_list = []

        self.logger.info("About to download from Dukascopy... for " + symbol)

        # single threaded
        df_list = [
            self.fetch_file(time, symbol)
            for time in self.hour_range(time_series_request.start_date,
                                        time_series_request.finish_date)
        ]

        # parallel (has pickle issues)
        # time_list = self.hour_range(time_series_request.start_date, time_series_request.finish_date)
        # df_list = Parallel(n_jobs=-1)(delayed(self.fetch_file)(time, symbol) for time in time_list)

        try:
            return pandas.concat(df_list)
        except:
            return None

    def fetch_file(self, time, symbol):
        if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time))

        tick_path = self.tick_name.format(symbol=symbol,
                                          year=str(time.year).rjust(4, '0'),
                                          month=str(time.month).rjust(2, '0'),
                                          day=str(time.day).rjust(2, '0'),
                                          hour=str(time.hour).rjust(2, '0'))

        tick = self.fetch_tick(Constants().dukascopy_base_url + tick_path)

        if Constants().dukascopy_write_temp_tick_disk:
            out_path = Constants(
            ).temp_pythalesians_folder + "/dkticks/" + tick_path

            if not os.path.exists(out_path):
                if not os.path.exists(os.path.dirname(out_path)):
                    os.makedirs(os.path.dirname(out_path))

            self.write_tick(tick, out_path)

        try:
            return self.retrieve_df(lzma.decompress(tick), symbol, time)
        except:
            return None

    def fetch_tick(self, tick_url):
        i = 0
        tick_request = None

        # try up to 5 times to download
        while i < 5:
            try:
                tick_request = requests.get(tick_url)
                i = 5
            except:
                i = i + 1

        if (tick_request is None):
            self.logger("Failed to download from " + tick_url)
            return None

        return tick_request.content

    def write_tick(self, content, out_path):
        data_file = open(out_path, "wb+")
        data_file.write(content)
        data_file.close()

    def chunks(self, list, n):
        if n < 1:
            n = 1
        return [list[i:i + n] for i in range(0, len(list), n)]

    def retrieve_df(self, data, symbol, epoch):
        date, tuple = pythalesians.market.loaders.lowlevel.brokers.parserows.parse_tick_data(
            data, epoch)

        df = pandas.DataFrame(data=tuple,
                              columns=['temp', 'bid', 'ask', 'bidv', 'askv'],
                              index=date)
        df.drop('temp', axis=1)
        df.index.name = 'Date'

        divisor = 100000

        # where JPY is the terms currency we have different divisor
        if symbol[3:6] == 'JPY':
            divisor = 1000

        # prices are returned without decimal point
        df['bid'] = df['bid'] / divisor
        df['ask'] = df['ask'] / divisor

        return df

    def hour_range(self, start_date, end_date):
        delta_t = end_date - start_date

        delta_hours = (delta_t.days * 24.0) + (delta_t.seconds / 3600.0)
        for n in range(int(delta_hours)):
            yield start_date + timedelta(0, 0, 0, 0, 0, n)  # Hours

    def get_daily_data(self):
        pass
class LoaderDukasCopy(LoaderTemplate):
    tick_name  = "{symbol}/{year}/{month}/{day}/{hour}h_ticks.bi5"

    def __init__(self):
        super(LoaderTemplate, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        import logging
        logging.getLogger("requests").setLevel(logging.WARNING)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request)

        data_frame = None
        self.logger.info("Request Dukascopy data")

        # doesn't support non-tick data
        if (time_series_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly', 'intraday', 'minute', 'hourly']):
            self.logger.warning("Dukascopy loader is for tick data only")

            return None

        # assume one ticker only (LightTimeSeriesFactory only calls one ticker at a time)
        if (time_series_request.freq in ['tick']):
            # time_series_request_vendor.tickers = time_series_request_vendor.tickers[0]

            data_frame = self.get_tick(time_series_request, time_series_request_vendor)

            if data_frame is not None: data_frame.tz_localize('UTC')

        self.logger.info("Completed request from Dukascopy")

        return data_frame

    def kill_session(self):
        return

    def get_tick(self, time_series_request, time_series_request_vendor):

        data_frame = self.download_tick(time_series_request_vendor)

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns
            returned_tickers = [time_series_request_vendor.tickers[0]] * (len(returned_fields))

        if data_frame is not None:
            fields = self.translate_from_vendor_field(returned_fields, time_series_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def download_tick(self, time_series_request):

        symbol = time_series_request.tickers[0]
        df_list = []

        self.logger.info("About to download from Dukascopy... for " + symbol)

        # single threaded
        df_list = [self.fetch_file(time, symbol) for time in
                   self.hour_range(time_series_request.start_date, time_series_request.finish_date)]

        # parallel (has pickle issues)
        # time_list = self.hour_range(time_series_request.start_date, time_series_request.finish_date)
        # df_list = Parallel(n_jobs=-1)(delayed(self.fetch_file)(time, symbol) for time in time_list)

        try:
            return pandas.concat(df_list)
        except:
            return None

    def fetch_file(self, time, symbol):
        if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time))

        tick_path = self.tick_name.format(
                symbol = symbol,
                year = str(time.year).rjust(4, '0'),
                month = str(time.month).rjust(2, '0'),
                day = str(time.day).rjust(2, '0'),
                hour = str(time.hour).rjust(2, '0')
            )

        tick = self.fetch_tick(Constants().dukascopy_base_url + tick_path)

        if Constants().dukascopy_write_temp_tick_disk:
            out_path = Constants().temp_pythalesians_folder + "/dkticks/" + tick_path

            if not os.path.exists(out_path):
                if not os.path.exists(os.path.dirname(out_path)):
                    os.makedirs(os.path.dirname(out_path))

            self.write_tick(tick, out_path)

        try:
            return self.retrieve_df(lzma.decompress(tick), symbol, time)
        except:
            return None

    def fetch_tick(self, tick_url):
        i = 0
        tick_request = None

        # try up to 5 times to download
        while i < 5:
            try:
                tick_request = requests.get(tick_url)
                i = 5
            except:
                i = i + 1

        if (tick_request is None):
            self.logger("Failed to download from " + tick_url)
            return None

        return tick_request.content

    def write_tick(self, content, out_path):
        data_file = open(out_path, "wb+")
        data_file.write(content)
        data_file.close()

    def chunks(self, list, n):
        if n < 1:
            n = 1
        return [list[i:i + n] for i in range(0, len(list), n)]

    def retrieve_df(self, data, symbol, epoch):
        date, tuple = pythalesians.market.loaders.lowlevel.brokers.parserows.parse_tick_data(data, epoch)

        df = pandas.DataFrame(data = tuple, columns=['temp', 'bid', 'ask', 'bidv', 'askv'], index = date)
        df.drop('temp', axis = 1)
        df.index.name = 'Date'

        divisor = 100000

        # where JPY is the terms currency we have different divisor
        if symbol[3:6] == 'JPY':
            divisor = 1000

        # prices are returned without decimal point
        df['bid'] =  df['bid'] /  divisor
        df['ask'] =  df['ask'] / divisor

        return df

    def hour_range(self, start_date, end_date):
          delta_t = end_date - start_date

          delta_hours = (delta_t.days *  24.0) + (delta_t.seconds / 3600.0)
          for n in range(int (delta_hours)):
              yield start_date + timedelta(0, 0, 0, 0, 0, n) # Hours

    def get_daily_data(self):
        pass