class TwitterPyThalesians:

    def __init__(self, *args, **kwargs):
        self.logger = LoggerManager().getLogger(__name__)

    def set_key(self, APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET):
        self.twitter = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET)

    def auto_set_key(self):
        self.twitter = Twython(Constants().APP_KEY, Constants().APP_SECRET,
                               Constants().OAUTH_TOKEN, Constants().OAUTH_TOKEN_SECRET)

    def update_status(self, msg, link = None, picture = None):
        # 22 chars URL
        # 23 chars picture

        chars_lim = 140

        if link is not None: chars_lim = chars_lim - (22 * link)
        if picture is not None: chars_lim = chars_lim - 23

        if (len(msg) > chars_lim):
            self.logger.info("Message too long for Twitter!")

        if picture is None:
            self.twitter.update_status(status=msg)
        else:
            photo = open(picture, 'rb')
            self.twitter.update_status_with_media(status=msg, media=photo)
Example #2
0
class BacktestRequest(TimeSeriesRequest):

    def __init__(self):
        super(BacktestRequest, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        self.__signal_name = None
        self.__tech_params = TechParams()

    @property
    def signal_name(self):
        return self.__signal_name

    @signal_name.setter
    def signal_name(self, signal_name):
        self.__signal_name = signal_name

    @property
    def tech_params(self):
        return self.__tech_params

    @tech_params.setter
    def tech_params(self, tech_params):
        self.__tech_params = tech_params

    @property
    def spot_tc_bp(self):
        return self.__spot_tc_bp

    @spot_tc_bp.setter
    def spot_tc_bp(self, spot_tc_bp):
        self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0)

    @property
    def asset(self):
        return self.__asset

    @asset.setter
    def asset(self, asset):
        valid_asset = ['fx', 'multi-asset']

        if not asset in valid_asset:
            self.logger.warning(asset & " is not a defined asset.")

        self.__asset = asset

    @property
    def instrument(self):
        return self.__instrument

    @instrument.setter
    def instrument(self, instrument):
        valid_instrument = ['spot', 'futures', 'options']

        if not instrument in valid_instrument:
            self.logger.warning(instrument & " is not a defined trading instrument.")

        self.__instrument = instrument
class LoaderQuandl(LoaderTemplate):

    def __init__(self):
        super(LoaderQuandl, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request)

        self.logger.info("Request Quandl data")

        data_frame = self.download_daily(time_series_request_vendor)

        if data_frame is None or data_frame.index is []: return None

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_tickers = data_frame.columns

        if data_frame is not None:
            # tidy up tickers into a format that is more easily translatable
            returned_tickers = [x.replace(' - Value', '') for x in returned_tickers]
            returned_tickers = [x.replace(' - VALUE', '') for x in returned_tickers]
            returned_tickers = [x.replace('.', '/') for x in returned_tickers]

            fields = self.translate_from_vendor_field(['close' for x in returned_tickers], time_series_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        self.logger.info("Completed request from Quandl.")

        return data_frame

    def download_daily(self, time_series_request):
        trials = 0

        data_frame = None

        while(trials < 5):
            try:
                data_frame = Quandl.get(time_series_request.tickers, authtoken=Constants().quandl_api_key, trim_start=time_series_request.start_date,
                            trim_end=time_series_request.finish_date)

                break
            except:
                trials = trials + 1
                self.logger.info("Attempting... " + str(trials) + " request to download from Quandl")

        if trials == 5:
            self.logger.error("Couldn't download from Quandl after several attempts!")

        return data_frame
    def __init__(self, symbols, fields, start=None, end=None, period='DAILY', addtl_sets=None, ignore_security_error=0, ignore_field_error=0):
        """ Historical data request for bbg.

        Parameters
        ----------
        symbols : string or list
        fields : string or list
        start : start date (if None then use 1 year ago)
        end : end date (if None then use today)
        period : ('DAILY', 'WEEKLY', 'MONTHLY', 'QUARTERLY', 'SEMI-ANNUAL', 'YEARLY')
        ignore_field_errors : bool
        ignore_security_errors : bool

        """

        Request.__init__(self, ignore_security_error=ignore_security_error, ignore_field_error=ignore_field_error)

        assert period in ('DAILY', 'WEEKLY', 'MONTHLY', 'QUARTERLY', 'SEMI-ANNUAL', 'YEARLY')
        self.symbols = isinstance(symbols, str) and [symbols] or symbols
        self.fields = isinstance(fields, str) and [fields] or fields

        if start is None:
            start = datetime.today() - timedelta(365)   # by default download the past year
        if end is None:
            end = datetime.today()

        self.start = to_datetime(start)
        self.end = to_datetime(end)
        self.period = period

        self.logger = LoggerManager().getLogger(__name__)

        # response related
        self.response = {}
    def __init__(self, symbol, interval, start=None, end=None, event='TRADE'):
        """
        Intraday bar request for bbg

        Parameters
        ----------
        symbols : string
        interval : number of minutes
        start : start date
        end : end date (if None then use today)
        event : (TRADE,BID,ASK,BEST_BID,BEST_ASK)

        """

        Request.__init__(self)

        self.logger = LoggerManager().getLogger(__name__)

        assert event in ('TRADE', 'BID', 'ASK', 'BEST_BID', 'BEST_ASK')
        assert isinstance(symbol, str)

        if start is None:
            start = datetime.today() - timedelta(30)

        if end is None:
            end = datetime.utcnow()

        self.symbol = symbol
        self.interval = interval
        self.start = to_datetime(start)
        self.end = to_datetime(end)
        self.event = event

        # response related
        self.response = defaultdict(list)
    def __init__(self, data_source = None,
                 start_date = None, finish_date = None, tickers = None, category = None, freq_mult = None, freq = None,
                 gran_freq = None, cut = None,
                 fields = None, cache_algo = None,
                 vendor_tickers = None, vendor_fields = None,
                 environment = None
                 ):

        self.logger = LoggerManager().getLogger(__name__)

        self.freq_mult = 1

        if data_source is not None: self.data_source = data_source
        if start_date is not None: self.start_date = start_date
        if finish_date is not None: self.finish_date = finish_date
        if tickers is not None: self.tickers = tickers
        if category is not None: self.category = category
        if freq_mult is not None: self.freq_mult = freq_mult
        if freq is not None: self.freq = freq
        if cut is not None: self.cut = cut
        if fields is not None: self.fields = fields
        if cache_algo is not None: self.cache_algo = cache_algo
        if vendor_tickers is not None: self.vendor_tickers = vendor_tickers
        if vendor_fields is not None: self.vendor_fields = vendor_fields
        if environment is not None: self.environment = environment
Example #7
0
    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' '
        self.SCALE_FACTOR = 3
        self.DEFAULT_PLOT_ENGINE = GraphicsConstants().plotfactory_default_adapter

        return
    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)

        self._all_econ_tickers = pandas.read_csv(Constants().all_econ_tickers)
        self._econ_country_codes = pandas.read_csv(Constants().econ_country_codes)
        self._econ_country_groups = pandas.read_csv(Constants().econ_country_groups)

        self.time_series_factory = LightTimeSeriesFactory()
    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()
        self._bbg_default_api = Constants().bbg_default_api
        self._intraday_code = -1

        return
    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)
        self.fxconv = FXConv()

        if Constants().default_time_series_factory == 'lighttimeseriesfactory':
            self.time_series_factory = LightTimeSeriesFactory()
        else:
            self.time_series_factory = CachedTimeSeriesFactory()
        return
    def __init__(self):
        super(StrategyTemplate, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        ##### FILL IN WITH YOUR OWN PARAMETERS FOR display, dumping, TSF etc.
        self.tsfactory = LightTimeSeriesFactory()
        self.DUMP_CSV = 'output_data/'
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' '
        self.FINAL_STRATEGY = 'Thalesians FX CTA'
        self.SCALE_FACTOR = 3
        
        return
class WebDataTemplate:

    def __init__(self):
        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        return

    @abc.abstractmethod
    def download_raw_data(self):
        return

    @abc.abstractmethod
    def construct_indicator(self):
        return

    def dump_indicator(self):

        indicator_group = self.raw_indicator # self.raw_indicator.join(self.processed_indicator, how='outer')

        self.logger.info("About to write all web indicators")
        indicator_group.to_csv(self._csv_indicator_dump, date_format='%d/%m/%Y %H:%M:%S')
    def __init__(self, symbols, fields, overrides=None, response_type='frame', ignore_security_error=0, ignore_field_error=0):
        """
        response_type: (frame, map) how to return the results
        """
        assert response_type in ('frame', 'map')
        Request.__init__(self, ignore_security_error=ignore_security_error, ignore_field_error=ignore_field_error)
        self.symbols = isinstance(symbols, str) and [symbols] or symbols
        self.fields = isinstance(fields, str) and [fields] or fields
        self.overrides = overrides
        # response related
        self.response = {} if response_type == 'map' else defaultdict(list)
        self.response_type = response_type

        self.logger = LoggerManager().getLogger(__name__)
class DataLoaderTemplate:

    def __init__(self):
        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        return

    def load_database(self, key = None):
        tsio = TimeSeriesIO()
        tsc = TimeSeriesCalcs()

        file = self._hdf5

        if key is not None:
            file = self._hdf5 + key + ".h5"

        # if cached file exists, use that, otherwise load CSV
        if os.path.isfile(file):
            self.logger.info("About to load market database from HDF5...")
            self.news_database = tsio.read_time_series_cache_from_disk(file)
            self.news_database = self.preprocess(self.news_database)
        else:
            self.logger.info("About to load market database from CSV...")
            self.news_database = self.load_csv()

        return self.news_database

    @abc.abstractmethod
    def load_csv(self):
        return

    def get_database(self, key):
        return self.news_database

    @abc.abstractmethod
    def preprocess(self, df):
        return
class CreateDataIndexTemplate:

    def __init__(self):
        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        return

    @abc.abstractmethod
    def create_indicator(self):
        return

    @abc.abstractmethod
    def aggregate_news_data(self, raw_database):
        return

    @abc.abstractmethod
    def get_cached_aggregate(self):
        return

    def grab_indicator(self):
        return self.indicator

    def grab_econ_indicator(self):
        return self.indicator_econ

    def grab_final_indicator(self):
        return self.indicator_final

    def truncate_indicator(self, daily_ind, match):
        cols = daily_ind.columns.values

        to_include = []

        for i in range(0, len(cols)):
            if match in cols[i]:
                to_include.append(i)

        return daily_ind[daily_ind.columns[to_include]]

    def dump_indicators(self):
        tsf = TimeSeriesFilter()
        self.logger.info("About to write all indicators to CSV")
        self.indicator.to_csv(self._csv_indicator_dump, date_format='%d/%m/%Y')

        if (self._csv_econ_indicator_dump is not None):
            self.logger.info("About to write economy based indicators to CSV")
            self.indicator_econ.to_csv(self._csv_econ_indicator_dump, date_format='%d/%m/%Y')

        self.logger.info("About to write final indicators to CSV")

        # remove weekends and remove start of series
        if (self._csv_final_indicator_dump is not None):
            indicator_final_copy = tsf.filter_time_series_by_holidays(self.indicator_final, cal = 'WEEKDAY')
            indicator_final_copy = tsf.filter_time_series_by_date(
                start_date="01 Jan 2000", finish_date = None, data_frame=indicator_final_copy)

            indicator_final_copy.to_csv(self._csv_final_indicator_dump, date_format='%d/%m/%Y')
Example #16
0
    def __init__(self):
        super(BBGLowLevelIntraday, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

        # constants
        self.BAR_DATA = blpapi.Name("barData")
        self.BAR_TICK_DATA = blpapi.Name("barTickData")
        self.OPEN = blpapi.Name("open")
        self.HIGH = blpapi.Name("high")
        self.LOW = blpapi.Name("low")
        self.CLOSE = blpapi.Name("close")
        self.VOLUME = blpapi.Name("volume")
        self.NUM_EVENTS = blpapi.Name("numEvents")
        self.TIME = blpapi.Name("time")
Example #17
0
    def __init__(self):
        super(BBGLowLevelTick, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

        # constants
        self.TICK_DATA = blpapi.Name("tickData")
        self.COND_CODE = blpapi.Name("conditionCodes")
        self.TICK_SIZE = blpapi.Name("size")
        self.TIME = blpapi.Name("time")
        self.TYPE = blpapi.Name("type")
        self.VALUE = blpapi.Name("value")
        self.RESPONSE_ERROR = blpapi.Name("responseError")
        self.CATEGORY = blpapi.Name("category")
        self.MESSAGE = blpapi.Name("message")
        self.SESSION_TERMINATED = blpapi.Name("SessionTerminated")
class TimeSeriesRequest:

    # properties
    #
    # data_source eg. bbg, yahoo, quandl
    # start_date
    # finish_date
    # tickers (can be list) eg. EURUSD
    # category (eg. fx, equities, fixed_income, cal_event, fundamental)
    # freq_mult (eg. 1)
    # freq
    # gran_freq (minute, daily, hourly, daily, weekly, monthly, yearly)
    # fields (can be list)
    # vendor_tickers (optional)
    # vendor_fields (optional)
    # cache_algo (eg. internet, disk, memory) - internet will forcibly download from the internet
    # environment (eg. prod, backtest) - old data is saved with prod, backtest will overwrite the last data point
    def __init__(self, data_source = None,
                 start_date = None, finish_date = None, tickers = None, category = None, freq_mult = None, freq = None,
                 gran_freq = None, cut = None,
                 fields = None, cache_algo = None,
                 vendor_tickers = None, vendor_fields = None,
                 environment = None
                 ):

        self.logger = LoggerManager().getLogger(__name__)

        self.freq_mult = 1

        if data_source is not None: self.data_source = data_source
        if start_date is not None: self.start_date = start_date
        if finish_date is not None: self.finish_date = finish_date
        if tickers is not None: self.tickers = tickers
        if category is not None: self.category = category
        if freq_mult is not None: self.freq_mult = freq_mult
        if freq is not None: self.freq = freq
        if cut is not None: self.cut = cut
        if fields is not None: self.fields = fields
        if cache_algo is not None: self.cache_algo = cache_algo
        if vendor_tickers is not None: self.vendor_tickers = vendor_tickers
        if vendor_fields is not None: self.vendor_fields = vendor_fields
        if environment is not None: self.environment = environment

    @property
    def data_source(self):
        return self.__data_source

    @data_source.setter
    def data_source(self, data_source):
        valid_data_source = ['ats', 'bloomberg', 'dukascopy', 'gain', 'quandl', 'yahoo']

        if not data_source in valid_data_source:
            self.logger.warning(data_source & " is not a defined data source.")

        self.__data_source = data_source

    @property
    def category(self):
        return self.__category

    @category.setter
    def category(self, category):
        self.__category = category

    @property
    def tickers(self):
        return self.__tickers

    @tickers.setter
    def tickers(self, tickers):
        if not isinstance(tickers, list):
            tickers = [tickers]

        self.__tickers = tickers

    @property
    def fields(self):
        return self.__fields

    @fields.setter
    def fields(self, fields):
        valid_fields = ['open', 'high', 'low', 'close', 'volume', 'numEvents']

        if not isinstance(fields, list):
            fields = [fields]

        for field_entry in fields:
            if not field_entry in valid_fields:
                i = 0
                # self.logger.warning(field_entry + " is not a valid field.")

        # add error checking

        self.__fields = fields

    @property
    def vendor_tickers(self):
        return self.__vendor_tickers

    @vendor_tickers.setter
    def vendor_tickers(self, vendor_tickers):
        if not isinstance(vendor_tickers, list):
            vednor_tickers = [vendor_tickers]

        self.__vendor_tickers = vendor_tickers

    @property
    def vendor_fields(self):
        return self.__vendor_fields

    @vendor_fields.setter
    def vendor_fields(self, vendor_fields):
        if not isinstance(vendor_fields, list):
            vendor_fields = [vendor_fields]

        self.__vendor_fields = vendor_fields

    @property
    def freq(self):
        return self.__freq

    @freq.setter
    def freq(self, freq):
        freq = freq.lower()

        valid_freq = ['tick', 'intraday', 'daily']

        if not freq in valid_freq:
            self.logger.warning(freq & " is not a defined frequency")

        self.__freq = freq

    @property
    def gran_freq(self):
        return self.__gran_freq

    @gran_freq.setter
    def gran_freq(self, gran_freq):
        gran_freq = gran_freq.lower()

        valid_gran_freq = ['tick', 'minute', 'hourly', 'pseudodaily', 'daily', 'weekly', 'monthly', 'quarterly', 'yearly']

        if not gran_freq in valid_gran_freq:
            self.logger.warning(gran_freq & " is not a defined frequency")

        if gran_freq in ['minute', 'hourly']:
            self.__freq = 'intraday'
        elif gran_freq in ['tick']:
            self.__freq = 'tick'
        else:
            self.__freq = 'daily'

        self.__gran_freq = gran_freq

    @property
    def freq_mult(self):
        return self.__freq_mult

    @freq_mult.setter
    def freq_mult(self, freq_mult):
        self.__freq_mult = freq_mult

    @property
    def start_date(self):
        return self.__start_date

    @start_date.setter
    def start_date(self, start_date):
        self.__start_date = self.date_parser(start_date)

    @property
    def finish_date(self):
        return self.__finish_date

    @finish_date.setter
    def finish_date(self, finish_date):
        self.__finish_date = self.date_parser(finish_date)

    @property
    def cut(self):
        return self.__cut

    @cut.setter
    def cut(self, cut):
        self.__cut = cut

    def date_parser(self, date):
        if isinstance(date, str):
            # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M'
            try:
                date = datetime.strptime(date, '%b %d %Y %H:%M')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

            # format expected '1 Jun 2005 01:33', '%d %b %Y %H:%M'
            try:
                date = datetime.strptime(date, '%d %b %Y %H:%M')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

            try:
                date = datetime.strptime(date, '%b %d %Y')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

            try:
                date = datetime.strptime(date, '%d %b %Y')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

        return date

    @property
    def cache_algo(self):
        return self.__cache_algo

    @cache_algo.setter
    def cache_algo(self, cache_algo):
        cache_algo = cache_algo.lower()

        valid_cache_algo = ['internet_load', 'internet_load_return', 'cache_algo', 'cache_algo_return']


        if not cache_algo in valid_cache_algo:
            self.logger.warning(cache_algo + " is not a defined caching scheme")

        self.__cache_algo = cache_algo

    @property
    def environment(self):
        return self.__environment

    @environment.setter
    def environment(self, environment):
        environment = environment.lower()

        valid_environment= ['prod', 'backtest']

        if not environment in valid_environment:
            self.logger.warning(environment & " is not a defined environment.")

        self.__environment = environment
Example #19
0
class TradeAnalysis:
    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime(
            "%Y%m%d") + ' '
        self.scale_factor = 3
        return

    def run_strategy_returns_stats(self, strategy):
        """
        run_strategy_returns_stats - Plots useful statistics for the trading strategy (using PyFolio)

        Parameters
        ----------
        strategy : StrategyTemplate
            defining trading strategy

        """

        pnl = strategy.get_strategy_pnl()
        tz = TimeSeriesTimezone()
        tsc = TimeSeriesCalcs()

        # PyFolio assumes UTC time based DataFrames (so force this localisation)
        try:
            pnl = tz.localise_index_as_UTC(pnl)
        except:
            pass

        # set the matplotlib style sheet & defaults
        try:
            matplotlib.rcdefaults()
            plt.style.use(Constants().
                          plotfactory_pythalesians_style_sheet['pythalesians'])
        except:
            pass

        # TODO for intraday strategies, make daily

        # convert DataFrame (assumed to have only one column) to Series
        pnl = tsc.calculate_returns(pnl)
        pnl = pnl[pnl.columns[0]]

        fig = pf.create_returns_tear_sheet(pnl, return_fig=True)

        try:
            plt.savefig(strategy.DUMP_PATH + "stats.png")
        except:
            pass

        plt.show()

    def run_tc_shock(self, strategy, tc=None):
        if tc is None: tc = [0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0]

        parameter_list = [{'spot_tc_bp': x} for x in tc]
        pretty_portfolio_names = [str(x) + 'bp'
                                  for x in tc]  # names of the portfolio
        parameter_type = 'TC analysis'  # broad type of parameter name

        return self.run_arbitrary_sensitivity(
            strategy,
            parameter_list=parameter_list,
            pretty_portfolio_names=pretty_portfolio_names,
            parameter_type=parameter_type)

    ###### Parameters and signal generations (need to be customised for every model)
    def run_arbitrary_sensitivity(self,
                                  strat,
                                  parameter_list=None,
                                  parameter_names=None,
                                  pretty_portfolio_names=None,
                                  parameter_type=None):

        asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets()

        port_list = None
        tsd_list = []

        for i in range(0, len(parameter_list)):
            br = strat.fill_backtest_request()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            strat.br = br  # for calculating signals

            signal_df = strat.construct_signal(spot_df, spot_df2,
                                               br.tech_params, br)

            cash_backtest = CashBacktest()
            self.logger.info("Calculating... " + pretty_portfolio_names[i])

            cash_backtest.calculate_trading_PnL(br, asset_df, signal_df)
            tsd_list.append(cash_backtest.get_portfolio_pnl_tsd())
            stats = str(cash_backtest.get_portfolio_pnl_desc()[0])

            port = cash_backtest.get_cumportfolio().resample('B').mean()
            port.columns = [pretty_portfolio_names[i] + ' ' + stats]

            if port_list is None:
                port_list = port
            else:
                port_list = port_list.join(port)

        # reset the parameters of the strategy
        strat.br = strat.fill_backtest_request()

        pf = PlotFactory()
        gp = GraphProperties()

        ir = [t.inforatio()[0] for t in tsd_list]

        # gp.color = 'Blues'
        # plot all the variations
        gp.resample = 'B'
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + '.png'
        gp.scale_factor = self.scale_factor
        gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type
        pf.plot_line_graph(port_list, adapter='pythalesians', gp=gp)

        # plot all the IR in a bar chart form (can be easier to read!)
        gp = GraphProperties()
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + ' IR.png'
        gp.scale_factor = self.scale_factor
        gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type
        summary = pandas.DataFrame(index=pretty_portfolio_names,
                                   data=ir,
                                   columns=['IR'])
        pf.plot_bar_graph(summary, adapter='pythalesians', gp=gp)

        return port_list

    ###### Parameters and signal generations (need to be customised for every model)
    ###### Plot all the output seperately
    def run_arbitrary_sensitivity_separately(self,
                                             strat,
                                             parameter_list=None,
                                             pretty_portfolio_names=None,
                                             strip=None):

        # asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets()
        final_strategy = strat.FINAL_STRATEGY

        for i in range(0, len(parameter_list)):
            br = strat.fill_backtest_request()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            strat.FINAL_STRATEGY = final_strategy + " " + pretty_portfolio_names[
                i]

            self.logger.info("Calculating... " + pretty_portfolio_names[i])
            strat.br = br
            strat.construct_strategy(br=br)

            strat.plot_strategy_pnl()
            strat.plot_strategy_leverage()
            strat.plot_strategy_group_benchmark_pnl(strip=strip)

        # reset the parameters of the strategy
        strat.br = strat.fill_backtest_request()
        strat.FINAL_STRATEGY = final_strategy

    def run_day_of_month_analysis(self, strat):
        from pythalesians.economics.seasonality.seasonality import Seasonality
        from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs

        tsc = TimeSeriesCalcs()
        seas = Seasonality()
        strat.construct_strategy()
        pnl = strat.get_strategy_pnl()

        # get seasonality by day of the month
        pnl = pnl.resample('B').mean()
        rets = tsc.calculate_returns(pnl)
        bus_day = seas.bus_day_of_month_seasonality(rets, add_average=True)

        # get seasonality by month
        pnl = pnl.resample('BM').mean()
        rets = tsc.calculate_returns(pnl)
        month = seas.monthly_seasonality(rets)

        self.logger.info("About to plot seasonality...")
        gp = GraphProperties()
        pf = PlotFactory()

        # Plotting spot over day of month/month of year
        gp.color = 'Blues'
        gp.scale_factor = self.scale_factor
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality day of month.png'
        gp.title = strat.FINAL_STRATEGY + ' day of month seasonality'
        gp.display_legend = False
        gp.color_2_series = [bus_day.columns[-1]]
        gp.color_2 = ['red']  # red, pink
        gp.linewidth_2 = 4
        gp.linewidth_2_series = [bus_day.columns[-1]]
        gp.y_axis_2_series = [bus_day.columns[-1]]

        pf.plot_line_graph(bus_day, adapter='pythalesians', gp=gp)

        gp = GraphProperties()

        gp.scale_factor = self.scale_factor
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality month of year.png'
        gp.title = strat.FINAL_STRATEGY + ' month of year seasonality'

        pf.plot_line_graph(month, adapter='pythalesians', gp=gp)

        return month
Example #20
0
class LoaderBBG(LoaderTemplate):
    def __init__(self):
        super(LoaderBBG, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        time_series_request_vendor = self.construct_vendor_time_series_request(
            time_series_request)

        data_frame = None
        self.logger.info("Request Bloomberg data")

        # do we need daily or intraday data?
        if (time_series_request.freq
                in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']):

            # for events times/dates separately needs ReferenceDataRequest (when specified)
            if 'release-date-time-full' in time_series_request.fields:
                # experimental
                datetime_data_frame = self.get_reference_data(
                    time_series_request_vendor, time_series_request)

                # remove fields 'release-date-time-full' from our request (and the associated field in the vendor)
                index = time_series_request.fields.index(
                    'release-date-time-full')
                time_series_request_vendor.fields.pop(index)
                time_series_request.fields.pop(index)

                # download all the other event fields (uses HistoricalDataRequest to Bloomberg)
                # concatenate with date time fields
                if len(time_series_request_vendor.fields) > 0:
                    events_data_frame = self.get_daily_data(
                        time_series_request, time_series_request_vendor)

                    col = events_data_frame.index.name
                    events_data_frame = events_data_frame.reset_index(
                        drop=False)

                    data_frame = pandas.concat(
                        [events_data_frame, datetime_data_frame], axis=1)
                    temp = data_frame[col]
                    del data_frame[col]
                    data_frame.index = temp
                else:
                    data_frame = datetime_data_frame

            # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg
            else:
                data_frame = self.get_daily_data(time_series_request,
                                                 time_series_request_vendor)

        # assume one ticker only
        # for intraday data we use IntradayDataRequest to Bloomberg
        if (time_series_request.freq in ['intraday', 'minute', 'hourly']):
            time_series_request_vendor.tickers = time_series_request_vendor.tickers[
                0]

            data_frame = self.download_intraday(time_series_request_vendor)

            cols = data_frame.columns.values
            data_frame.tz_localize('UTC')
            cols = time_series_request.tickers[0] + "." + cols
            data_frame.columns = cols

        self.logger.info("Completed request from Bloomberg.")

        return data_frame

    def get_daily_data(self, time_series_request, time_series_request_vendor):
        data_frame = self.download_daily(time_series_request_vendor)

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            if data_frame.empty:
                self.logger.infro("No tickers returned")

                return None

            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields,
                                                      time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def get_reference_data(self, time_series_request_vendor,
                           time_series_request):
        end = datetime.datetime.today()
        end = end.replace(year=end.year + 1)

        time_series_request_vendor.finish_date = end

        self.logger.debug("Requesting ref for " +
                          time_series_request_vendor.tickers[0] + " etc.")

        data_frame = self.download_ref(time_series_request_vendor)

        self.logger.debug("Waiting for ref...")

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields,
                                                      time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined

            data_frame = data_frame.convert_objects(convert_dates='coerce',
                                                    convert_numeric='coerce')

        return data_frame

    # implement method in abstract superclass
    @abc.abstractmethod
    def kill_session(self):
        return

    @abc.abstractmethod
    def download_intraday(self, time_series_request):
        return

    @abc.abstractmethod
    def download_daily(self, time_series_request):
        return

    @abc.abstractmethod
    def download_ref(self, time_series_request):
        return
Example #21
0
class BBGLowLevelIntraday(BBGLowLevelTemplate):

    def __init__(self):
        super(BBGLowLevelIntraday, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

        # constants
        self.BAR_DATA = blpapi.Name("barData")
        self.BAR_TICK_DATA = blpapi.Name("barTickData")
        self.OPEN = blpapi.Name("open")
        self.HIGH = blpapi.Name("high")
        self.LOW = blpapi.Name("low")
        self.CLOSE = blpapi.Name("close")
        self.VOLUME = blpapi.Name("volume")
        self.NUM_EVENTS = blpapi.Name("numEvents")
        self.TIME = blpapi.Name("time")

    def combine_slices(self, data_frame, data_frame_slice):
        return data_frame.append(data_frame_slice)

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers[0]    # get 1st ticker only!
        self._options.event = "TRADE"
        self._options.barInterval = time_series_request.freq_mult
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        self._options.gapFillInitialBar = False

        if hasattr(self._options.startDateTime, 'microsecond'):
            self._options.startDateTime = self._options.startDateTime.replace(microsecond=0)

        if hasattr(self._options.endDateTime, 'microsecond'):
            self._options.endDateTime = self._options.endDateTime.replace(microsecond=0)

        return self._options

    # iterate through Bloomberg output creating a DataFrame output
    # implements abstract method
    def process_message(self, msg):
        data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA)

        self.logger.info("Processing intraday data for " + str(self._options.security))

        data_vals = list(data.values())

        # data_matrix = numpy.zeros([len(data_vals), 6])
        # data_matrix.fill(numpy.nan)
        #
        # date_index = [None] * len(data_vals)
        #
        # for i in range(0, len(data_vals)):
        #     data_matrix[i][0] = data_vals[i].getElementAsFloat(self.OPEN)
        #     data_matrix[i][1] = data_vals[i].getElementAsFloat(self.HIGH)
        #     data_matrix[i][2] = data_vals[i].getElementAsFloat(self.LOW)
        #     data_matrix[i][3] = data_vals[i].getElementAsFloat(self.CLOSE)
        #     data_matrix[i][4] = data_vals[i].getElementAsInteger(self.VOLUME)
        #     data_matrix[i][5] = data_vals[i].getElementAsInteger(self.NUM_EVENTS)
        #
        #     date_index[i] = data_vals[i].getElementAsDatetime(self.TIME)
        #
        # self.logger.info("Dates between " + str(date_index[0]) + " - " + str(date_index[-1]))
        #
        # # create pandas dataframe with the Bloomberg output
        # return pandas.DataFrame(data = data_matrix, index = date_index,
        #                columns=['open', 'high', 'low', 'close', 'volume', 'events'])

        ## for loop method is touch slower
        # time_list = []
        # data_table = []

        # for bar in data_vals:
        #     data_table.append([bar.getElementAsFloat(self.OPEN),
        #                  bar.getElementAsFloat(self.HIGH),
        #                  bar.getElementAsFloat(self.LOW),
        #                  bar.getElementAsFloat(self.CLOSE),
        #                  bar.getElementAsInteger(self.VOLUME),
        #                  bar.getElementAsInteger(self.NUM_EVENTS)])
        #
        #     time_list.append(bar.getElementAsDatetime(self.TIME))

        # each price time point has multiple fields - marginally quicker
        tuple = [([bar.getElementAsFloat(self.OPEN),
                        bar.getElementAsFloat(self.HIGH),
                        bar.getElementAsFloat(self.LOW),
                        bar.getElementAsFloat(self.CLOSE),
                        bar.getElementAsInteger(self.VOLUME),
                        bar.getElementAsInteger(self.NUM_EVENTS)],
                        bar.getElementAsDatetime(self.TIME)) for bar in data_vals]

        data_table = list(map(itemgetter(0), tuple))
        time_list = list(map(itemgetter(1), tuple))

        try:
            self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1]))
        except:
            self.logger.info("No dates retrieved")
            return None

        # create pandas dataframe with the Bloomberg output
        return pandas.DataFrame(data = data_table, index = time_list,
                      columns=['open', 'high', 'low', 'close', 'volume', 'events'])

    # implement abstract method: create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("IntradayBarRequest")

        # only one security/eventType per request
        request.set("security", self._options.security)
        request.set("eventType", self._options.event)
        request.set("interval", self._options.barInterval)

        # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT')

        if self._options.startDateTime and self._options.endDateTime:
            request.set("startDateTime", self._options.startDateTime)
            request.set("endDateTime", self._options.endDateTime)

        if self._options.gapFillInitialBar:
            request.append("gapFillInitialBar", True)

        self.logger.info("Sending Intraday Bloomberg Request...")

        session.sendRequest(request)
Example #22
0
class TradeAnalysis:

    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' '
        self.scale_factor = 3
        return

    def run_strategy_returns_stats(self, strategy):
        """
        run_strategy_returns_stats - Plots useful statistics for the trading strategy (using PyFolio)

        Parameters
        ----------
        strategy : StrategyTemplate
            defining trading strategy

        """

        pnl = strategy.get_strategy_pnl()
        tz = TimeSeriesTimezone()
        tsc = TimeSeriesCalcs()

        # PyFolio assumes UTC time based DataFrames (so force this localisation)
        try:
            pnl = tz.localise_index_as_UTC(pnl)
        except: pass

        # set the matplotlib style sheet & defaults
        try:
            matplotlib.rcdefaults()
            plt.style.use(Constants().plotfactory_pythalesians_style_sheet['pythalesians'])
        except: pass

        # TODO for intraday strategies, make daily

        # convert DataFrame (assumed to have only one column) to Series
        pnl = tsc.calculate_returns(pnl)
        pnl = pnl[pnl.columns[0]]

        fig = pf.create_returns_tear_sheet(pnl, return_fig=True)

        try:
            plt.savefig (strategy.DUMP_PATH + "stats.png")
        except: pass

        plt.show()

    def run_tc_shock(self, strategy, tc = None):
        if tc is None: tc = [0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0]

        parameter_list = [{'spot_tc_bp' : x } for x in tc]
        pretty_portfolio_names = [str(x) + 'bp' for x in tc]    # names of the portfolio
        parameter_type = 'TC analysis'                          # broad type of parameter name

        return self.run_arbitrary_sensitivity(strategy,
                                 parameter_list=parameter_list,
                                 pretty_portfolio_names=pretty_portfolio_names,
                                 parameter_type=parameter_type)

    ###### Parameters and signal generations (need to be customised for every model)
    def run_arbitrary_sensitivity(self, strat, parameter_list = None, parameter_names = None,
                                  pretty_portfolio_names = None, parameter_type = None):

        asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets()

        port_list = None
        tsd_list = []

        for i in range(0, len(parameter_list)):
            br = strat.fill_backtest_request()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            strat.br = br   # for calculating signals

            signal_df = strat.construct_signal(spot_df, spot_df2, br.tech_params, br)

            cash_backtest = CashBacktest()
            self.logger.info("Calculating... " + pretty_portfolio_names[i])

            cash_backtest.calculate_trading_PnL(br, asset_df, signal_df)
            tsd_list.append(cash_backtest.get_portfolio_pnl_tsd())
            stats = str(cash_backtest.get_portfolio_pnl_desc()[0])

            port = cash_backtest.get_cumportfolio().resample('B').mean()
            port.columns = [pretty_portfolio_names[i] + ' ' + stats]

            if port_list is None:
                port_list = port
            else:
                port_list = port_list.join(port)

        # reset the parameters of the strategy
        strat.br = strat.fill_backtest_request()

        pf = PlotFactory()
        gp = GraphProperties()

        ir = [t.inforatio()[0] for t in tsd_list]

        # gp.color = 'Blues'
        # plot all the variations
        gp.resample = 'B'
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + '.png'
        gp.scale_factor = self.scale_factor
        gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type
        pf.plot_line_graph(port_list, adapter = 'pythalesians', gp = gp)

        # plot all the IR in a bar chart form (can be easier to read!)
        gp = GraphProperties()
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + ' IR.png'
        gp.scale_factor = self.scale_factor
        gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type
        summary = pandas.DataFrame(index = pretty_portfolio_names, data = ir, columns = ['IR'])
        pf.plot_bar_graph(summary, adapter = 'pythalesians', gp = gp)

        return port_list

    ###### Parameters and signal generations (need to be customised for every model)
    ###### Plot all the output seperately
    def run_arbitrary_sensitivity_separately(self, strat, parameter_list = None,
                                  pretty_portfolio_names = None, strip = None):

        # asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets()
        final_strategy = strat.FINAL_STRATEGY

        for i in range(0, len(parameter_list)):
            br = strat.fill_backtest_request()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            strat.FINAL_STRATEGY = final_strategy + " " + pretty_portfolio_names[i]

            self.logger.info("Calculating... " + pretty_portfolio_names[i])
            strat.br = br
            strat.construct_strategy(br = br)

            strat.plot_strategy_pnl()
            strat.plot_strategy_leverage()
            strat.plot_strategy_group_benchmark_pnl(strip = strip)

        # reset the parameters of the strategy
        strat.br = strat.fill_backtest_request()
        strat.FINAL_STRATEGY = final_strategy

    def run_day_of_month_analysis(self, strat):
        from pythalesians.economics.seasonality.seasonality import Seasonality
        from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs

        tsc = TimeSeriesCalcs()
        seas = Seasonality()
        strat.construct_strategy()
        pnl = strat.get_strategy_pnl()

        # get seasonality by day of the month
        pnl = pnl.resample('B').mean()
        rets = tsc.calculate_returns(pnl)
        bus_day = seas.bus_day_of_month_seasonality(rets, add_average = True)

        # get seasonality by month
        pnl = pnl.resample('BM').mean()
        rets = tsc.calculate_returns(pnl)
        month = seas.monthly_seasonality(rets)

        self.logger.info("About to plot seasonality...")
        gp = GraphProperties()
        pf = PlotFactory()

        # Plotting spot over day of month/month of year
        gp.color = 'Blues'
        gp.scale_factor = self.scale_factor
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality day of month.png'
        gp.title = strat.FINAL_STRATEGY + ' day of month seasonality'
        gp.display_legend = False
        gp.color_2_series = [bus_day.columns[-1]]
        gp.color_2 = ['red'] # red, pink
        gp.linewidth_2 = 4
        gp.linewidth_2_series = [bus_day.columns[-1]]
        gp.y_axis_2_series = [bus_day.columns[-1]]

        pf.plot_line_graph(bus_day, adapter = 'pythalesians', gp = gp)

        gp = GraphProperties()

        gp.scale_factor = self.scale_factor
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality month of year.png'
        gp.title = strat.FINAL_STRATEGY + ' month of year seasonality'

        pf.plot_line_graph(month, adapter = 'pythalesians', gp = gp)

        return month
Example #23
0
class LoaderQuandl(LoaderTemplate):
    def __init__(self):
        super(LoaderQuandl, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        time_series_request_vendor = self.construct_vendor_time_series_request(
            time_series_request)

        self.logger.info("Request Quandl data")

        data_frame = self.download_daily(time_series_request_vendor)

        if data_frame is None or data_frame.index is []: return None

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_tickers = data_frame.columns

        if data_frame is not None:
            # tidy up tickers into a format that is more easily translatable
            returned_tickers = [
                x.replace(' - Value', '') for x in returned_tickers
            ]
            returned_tickers = [
                x.replace(' - VALUE', '') for x in returned_tickers
            ]
            returned_tickers = [x.replace('.', '/') for x in returned_tickers]

            fields = self.translate_from_vendor_field(
                ['close' for x in returned_tickers], time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        self.logger.info("Completed request from Quandl.")

        return data_frame

    def download_daily(self, time_series_request):
        trials = 0

        data_frame = None

        while (trials < 5):
            try:
                data_frame = Quandl.get(
                    time_series_request.tickers,
                    authtoken=Constants().quandl_api_key,
                    trim_start=time_series_request.start_date,
                    trim_end=time_series_request.finish_date)

                break
            except:
                trials = trials + 1
                self.logger.info("Attempting... " + str(trials) +
                                 " request to download from Quandl")

        if trials == 5:
            self.logger.error(
                "Couldn't download from Quandl after several attempts!")

        return data_frame
Example #24
0
class BBGLowLevelRef(BBGLowLevelTemplate):

    def __init__(self):
        super(BBGLowLevelRef, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
        self._options = []

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        self._options.fields = time_series_request.fields

        return self._options

    def process_message(self, msg):
        data = collections.defaultdict(dict)

        # process received events
        securityDataArray = msg.getElement('securityData')

        index = 0

        for securityData in list(securityDataArray.values()):
            ticker = securityData.getElementAsString("security")
            fieldData = securityData.getElement("fieldData")

            for field in fieldData.elements():
                if not field.isValid():
                    field_name = "%s" % field.name()

                    self.logger.error(field_name + " is NULL")
                elif field.isArray():
                    # iterate over complex data returns.
                    field_name = "%s" % field.name()

                    for i, row in enumerate(field.values()):
                        data[(field_name, ticker)][index] = re.findall(r'"(.*?)"', "%s" % row)[0]

                        index = index + 1
                # else:
                    # vals.append(re.findall(r'"(.*?)"', "%s" % row)[0])
                    # print("%s = %s" % (field.name(), field.getValueAsString()))

            fieldExceptionArray = securityData.getElement("fieldExceptions")

            for fieldException in list(fieldExceptionArray.values()):
                errorInfo = fieldException.getElement("errorInfo")
                print(errorInfo.getElementAsString("category"), ":", \
                    fieldException.getElementAsString("fieldId"))

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not(data_frame.empty)):
            data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            self.logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    def combine_slices(self, data_frame, data_frame_slice):
        if (data_frame_slice.columns.get_level_values(1).values[0]
            not in data_frame.columns.get_level_values(1).values):

            return data_frame.join(data_frame_slice, how="outer")

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest('ReferenceDataRequest')

        self.add_override(request, 'TIME_ZONE_OVERRIDE', 23)    # force GMT time
        self.add_override(request, 'START_DT', self._options.startDateTime.strftime('%Y%m%d'))
        self.add_override(request, 'END_DT', self._options.endDateTime.strftime('%Y%m%d'))

        # only one security/eventType per request
        for field in self._options.fields:
            request.getElement("fields").appendValue(field)

        for security in self._options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Ref Request:" + str(request))
        session.sendRequest(request)
Example #25
0
 def __init__(self):
     # self.config = ConfigManager()
     self.logger = LoggerManager().getLogger(__name__)
     return
Example #26
0
class LightTimeSeriesFactory:
    _time_series_cache = {} # shared across all instances of object!

    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()
        self._bbg_default_api = Constants().bbg_default_api
        self._intraday_code = -1

        return

    def set_bloomberg_com_api(self):
        """
        set_bloomberg_com_api - Sets Bloomberg API to COM library

        """

        self._bbg_default_api = 'com-api'

    def set_bloomberg_open_api(self):
        """
        set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended)

        """

        self._bbg_default_api = 'open-api'

    def flush_cache(self):
        """
        flush_cache - Flushs internal cache of time series

        """
        self._time_series_cache = {}

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_loader(self, source):
        """
        get_loader - Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.

        Returns
        -------
        LoaderTemplate
        """

        loader = None

        if source == 'bloomberg':

            ### allow use of COM API (older) and Open APIs (newer) for Bloomberg
            if self._bbg_default_api == 'com-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM
                loader = LoaderBBGCOM()
            elif self._bbg_default_api == 'open-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen
                loader = LoaderBBGOpen()

        elif source == 'quandl':
            from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl
            loader = LoaderQuandl()

        elif source in ['yahoo', 'google', 'fred']:
            from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb
            loader = LoaderPandasWeb()

        # TODO add support for other data sources (like Reuters)

        return loader

    def harvest_time_series(self, time_series_request, kill_session = True):
        """
        havest_time_series - Loads time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        DataFrame
        """

        tickers = time_series_request.tickers
        loader = self.get_loader(time_series_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None : create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            time_series_request.tickers = self.config.get_tickers_list_for_category(
            time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut)

        # intraday or tick: only one ticker per cache file
        if (time_series_request.freq in ['intraday', 'tick']):
            data_frame_agg = self.download_intraday_tick(time_series_request, loader)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else: data_frame_agg = self.download_daily(time_series_request, loader)

        if('internet_load' in time_series_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to loader template to exit session
            if loader is not None and kill_session == True: loader.kill_session()

        if(time_series_request.cache_algo == 'cache_algo'):
            self.logger.debug("Only caching data in memory, do not return any time series."); return

        tsf = TimeSeriesFilter()

        # only return time series if specified in the algo
        if 'return' in time_series_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if hasattr(time_series_request, 'category'):
                if 'events' in time_series_request.category:
                    return data_frame_agg

            try:
                return tsf.filter_time_series(time_series_request, data_frame_agg)
            except:
                return None

    def get_time_series_cached(self, time_series_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        DataFrame
        """

        if (time_series_request.freq == "intraday"):
            ticker = time_series_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(time_series_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            tsf = TimeSeriesFilter()

            return tsf.filter_time_series(time_series_request, data_frame)

        return None

    def create_time_series_hash_key(self, time_series_request, ticker = None):
        """
        create_time_series_hash_key - Creates a hash key for retrieving the time series

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if(isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(
            self.create_category_key(time_series_request, ticker))

    def download_intraday_tick(self, time_series_request, loader):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        DataFrame
        """

        data_frame_agg = None

        ticker_cycle = 0

        # handle intraday ticker calls separately one by one
        for ticker in time_series_request.tickers:
            time_series_request_single = copy.copy(time_series_request)
            time_series_request_single.tickers = ticker

            if hasattr(time_series_request, 'vendor_tickers'):
                time_series_request_single.vendor_tickers = time_series_request.vendor_tickers[ticker_cycle]
                ticker_cycle = ticker_cycle + 1

            # we downscale into float32, to avoid memory problems in Python (32 bit)
            # data is stored on disk as float32 anyway
            data_frame_single = loader.load_ticker(time_series_request_single)

            # if the vendor doesn't provide any data, don't attempt to append
            if data_frame_single is not None:
                if data_frame_single.empty == False:
                    data_frame_single.index.name = 'Date'
                    data_frame_single = data_frame_single.astype('float32')

                    # if you call for returning multiple tickers, be careful with memory considerations!
                    if data_frame_agg is not None:
                        data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                    else:
                        data_frame_agg = data_frame_single

            # key = self.create_category_key(time_series_request, ticker)
            # fname = self.create_cache_file_name(key)
            # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

        return data_frame_agg

    def download_daily(self, time_series_request, loader):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file
        data_frame_agg = loader.load_ticker(time_series_request)

        key = self.create_category_key(time_series_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def create_category_key(self, time_series_request, ticker=None):
        """
        create_category_key - Returns a category key for the associated TimeSeriesRequest

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str

        """
        category = 'default-cat'
        cut = 'default-cut'

        if hasattr(time_series_request, 'category'): category = time_series_request.category

        source = time_series_request.data_source
        freq = time_series_request.freq

        if hasattr(time_series_request, 'cut'): cut = time_series_request.cut

        if (ticker is not None): key = category + '.' + source + '.' + freq + '.' + cut + '.' + ticker
        else: key = category + '.' + source + '.' + freq + '.' + cut

        return key

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename
Example #27
0
class TimeSeriesIO:

    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)

    ### functions to handle Excel on disk
    def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False):
        """
        write_time_series_to_excel - writes Pandas data frame to disk in Excel format

        Parameters
        ----------
        fname : str
            Excel filename to be written to
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        create_new : boolean
            to create a new Excel file
        """

        if(create_new):
            writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
        else:
            if os.path.isfile(fname):
                book = load_workbook(fname)
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
                writer.book = book
                writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
            else:
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')

        data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter')

        writer.save()
        writer.close()

    def write_time_series_to_excel_writer(self, writer, sheet, data_frame):
        """
        write_time_series_to_excel_writer - writes Pandas data frame to disk in Excel format for a writer

        Parameters
        ----------
        writer : ExcelWriter
            File handle to use for writing Excel file to disk
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        """
        data_frame.to_excel(writer, sheet, engine='xlsxwriter')

    def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff = None, dateparse = None,
                            postfix = '.close', intraday_tz = 'UTC'):
        """
        read_excel_data_frame - Reads Excel from disk into DataFrame

        Parameters
        ----------
        f_name : str
            Excel file path to read
        freq : str
            Frequency of data to read (intraday/daily etc)
        cutoff : DateTime (optional)
            end date to read up to
        dateparse : str (optional)
            date parser to use
        postfix : str (optional)
            postfix to add to each columns
        intraday_tz : str
            timezone of file if uses intraday data

        Returns
        -------
        DataFrame
        """

        return self.read_csv_data_frame(f_name, freq, cutoff = cutoff, dateparse = dateparse,
                            postfix = postfix, intraday_tz = intraday_tz, excel_sheet = excel_sheet)

    ### functions to handle HDF5 on disk
    def write_time_series_cache_to_disk(self, fname, data_frame):
        """
        write_time_series_cache_to_disk - writes Pandas data frame to disk as HDF5 format

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        """

        h5_filename_temp = self.get_h5_filename(fname + ".temp")
        h5_filename = self.get_h5_filename(fname)

        # delete the old copy
        try:
            # os.remove(h5_filename_temp)
            p =0
        except: pass

        store = pandas.HDFStore(h5_filename_temp, complib="blosc", complevel=9)

        if ('intraday' in fname):
            data_frame = data_frame.astype('float32')

        store['data'] = data_frame
        store.close()

        # delete the old copy
        try:
            os.remove(h5_filename)
        except: pass

        # once written to disk rename
        os.rename(h5_filename_temp, h5_filename)

    def get_h5_filename(self, fname):
        """
        get_h5_filename - Strips h5 off filename returning first portion of filename

        Parameters
        ----------
        fname : str
            h5 filename to strip

        Returns
        -------
        str
        """
        if fname[-3:] == '.h5':
            return fname

        return fname + ".h5"

    def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields = None):
        """
        write_r_compatible_hdf_dataframe - Write a DataFrame to disk in as an R compatible HDF5 file

        Parameters
        ----------
        data_frame : DataFrame
            data frame to be written
        fname : str
            file path to be written
        fields : list(str)
            columns to be written
        """
        fname_r = self.get_h5_filename(fname)

        self.logger.info("About to dump R binary HDF5 - " + fname_r)
        data_frame32 = data_frame.astype('float32')

        if fields is None:
            fields = data_frame32.columns.values

        # decompose date/time into individual fields (easier to pick up in R)
        data_frame32['Year'] = data_frame.index.year
        data_frame32['Month'] = data_frame.index.month
        data_frame32['Day'] = data_frame.index.day
        data_frame32['Hour'] = data_frame.index.hour
        data_frame32['Minute'] = data_frame.index.minute
        data_frame32['Second'] = data_frame.index.second
        data_frame32['Millisecond'] = data_frame.index.microsecond / 1000

        data_frame32 = data_frame32[
            ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'] + fields]

        cols = data_frame32.columns

        store_export = pandas.HDFStore(fname_r)
        store_export.put('df_for_r', data_frame32, data_columns=cols)
        store_export.close()

    def read_time_series_cache_from_disk(self, fname):
        """
        read_time_series_cache_from_disk - Reads time series cache from disk

        Parameters
        ----------
        fname : str
            file to be read from

        Returns
        -------
        DataFrame
        """

        if os.path.isfile(self.get_h5_filename(fname)):
            store = pandas.HDFStore(self.get_h5_filename(fname))
            data_frame = store.select("data")

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            store.close()

            return data_frame

        return None

    ### functions for CSV reading and writing
    def write_time_series_to_csv(self, csv_path, data_frame):
        data_frame.to_csv(csv_path)

    def read_csv_data_frame(self, f_name, freq, cutoff = None, dateparse = None,
                            postfix = '.close', intraday_tz = 'UTC', excel_sheet = None):
        """
        read_csv_data_frame - Reads CSV/Excel from disk into DataFrame

        Parameters
        ----------
        f_name : str
            CSV/Excel file path to read
        freq : str
            Frequency of data to read (intraday/daily etc)
        cutoff : DateTime (optional)
            end date to read up to
        dateparse : str (optional)
            date parser to use
        postfix : str (optional)
            postfix to add to each columns
        intraday_tz : str (optional)
            timezone of file if uses intraday data
        excel_sheet : str (optional)
            Excel sheet to be read

        Returns
        -------
        DataFrame
        """

        if(freq == 'intraday'):

            if dateparse is None:
                dateparse = lambda x: datetime.datetime(*map(int, [x[6:10], x[3:5], x[0:2],
                                                   x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'dukascopy':
                dateparse = lambda x: datetime.datetime(*map(int, [x[0:4], x[5:7], x[8:10],
                                                   x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'c':
                # use C library for parsing dates, several hundred times quicker
                # requires compilation of library to install
                import ciso8601
                dateparse = lambda x: ciso8601.parse_datetime(x)

            if excel_sheet is None:
                data_frame = pandas.read_csv(f_name, index_col = 0, parse_dates = True, date_parser = dateparse)
            else:
                data_frame = pandas.read_excel(f_name, excel_sheet, index_col = 0, na_values=['NA'])

            data_frame = data_frame.astype('float32')
            data_frame.index.names = ['Date']

            old_cols = data_frame.columns
            new_cols = []

            # add '.close' to each column name
            for col in old_cols:
                new_cols.append(col + postfix)

            data_frame.columns = new_cols
        else:
            # daily data
            if 'events' in f_name:

                data_frame = pandas.read_csv(f_name)

                # very slow conversion
                data_frame = data_frame.convert_objects(convert_dates = 'coerce')

            else:
                if excel_sheet is None:
                    data_frame = pandas.read_csv(f_name, index_col=0, parse_dates =["DATE"], date_parser = dateparse)
                else:
                    data_frame = pandas.read_excel(f_name, excel_sheet, index_col = 0, na_values=['NA'])

        # convert Date to Python datetime
        # datetime data_frame['Date1'] = data_frame.index

        # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S')
        # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]),
        #                                        int(x[12:13]), int(x[15:16]), int(x[18:19])))

        # data_frame.index = data_frame['Date1']
        # data_frame.drop('Date1')

        # slower method: data_frame.index = pandas.to_datetime(data_frame.index)

        if(freq == 'intraday'):
            # assume time series are already in UTC and assign this (can specify other time zones)
            data_frame = data_frame.tz_localize(intraday_tz)

        # end cutoff date
        if cutoff is not None:
            if (isinstance(cutoff, str)):
                cutoff = parse(cutoff)

            data_frame = data_frame.loc[data_frame.index < cutoff]

        return data_frame

    def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None):
        """
        convert_csv_data_frame - Converts CSV file to HDF5 file

        Parameters
        ----------
        f_name : str
            File name to be read
        category : str
            data category of file (used in HDF5 filename)
        freq : str
            intraday/daily frequency (used in HDF5 filename)
        cutoff : DateTime (optional)
            filter dates up to here
        dateparse : str
            date parser to use
        """

        self.logger.info("About to read... " + f_name)

        data_frame = self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse)

        category_f_name = self.create_cache_file_name(category)

        self.write_time_series_cache_to_disk(
            category_f_name, data_frame)

    def clean_csv_file(self, f_name):
        """
        clean_csv_file - Cleans up CSV file (removing empty characters) before writing back to disk

        Parameters
        ----------
        f_name : str
            CSV file to be cleaned
        """

        with codecs.open (f_name, 'rb', 'utf-8') as myfile:
            data = myfile.read()

            # clean file first if dirty
            if data.count( '\x00' ):
                self.logger.info('Cleaning CSV...')

                with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of:
                    of.write(data.replace('\x00', ''))

                shutil.move(f_name + '.tmp', f_name)

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename
Example #28
0
 def __init__(self):
     self.logger = LoggerManager().getLogger(__name__)
     self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime(
         "%Y%m%d") + ' '
     self.scale_factor = 3
     return
Example #29
0
 def __init__(self):
     self.logger = LoggerManager().getLogger(__name__)
    from pythalesians.market.requests.timeseriesrequest import TimeSeriesRequest
    from pythalesians.market.loaders.lighttimeseriesfactory import LightTimeSeriesFactory
    from pythalesians.util.fxconv import FXConv

    # for logging
    from pythalesians.util.loggermanager import LoggerManager

    # for signal generation
    from pythalesians.timeseries.techind.techindicator import TechIndicator
    from pythalesians.timeseries.techind.techparams import TechParams

    # for plotting
    from pythalesians.graphics.graphs.graphproperties import GraphProperties
    from pythalesians.graphics.graphs.plotfactory import PlotFactory

    logger = LoggerManager().getLogger(__name__)

    import datetime

    cash_backtest = CashBacktest()
    br = BacktestRequest()
    fxconv = FXConv()

    # get all asset data
    br.start_date = "02 Jan 1990"
    br.finish_date = datetime.datetime.utcnow()
    br.spot_tc_bp = 2.5  # 2.5 bps bid/ask spread
    br.ann_factor = 252

    # have vol target for each signal
    br.signal_vol_adjust = True
Example #31
0
 def __init__(self):
     self.logger = LoggerManager().getLogger(__name__)
     self._pnl = None
     self._portfolio = None
     return
    from pythalesians.market.requests.timeseriesrequest import TimeSeriesRequest
    from pythalesians.market.loaders.lighttimeseriesfactory import LightTimeSeriesFactory
    from pythalesians.util.fxconv import FXConv

    # for logging
    from pythalesians.util.loggermanager import LoggerManager

    # for signal generation
    from pythalesians.timeseries.techind.techindicator import TechIndicator
    from pythalesians.timeseries.techind.techparams import TechParams

    # for plotting
    from pythalesians_graphics.graphs.graphproperties import GraphProperties
    from pythalesians_graphics.graphs import PlotFactory

    logger = LoggerManager().getLogger(__name__)

    import datetime

    cash_backtest = CashBacktest()
    br = BacktestRequest()
    fxconv = FXConv()

    # get all asset data
    br.start_date = "02 Jan 1990"
    br.finish_date = datetime.datetime.utcnow()
    br.spot_tc_bp = 2.5                             # 2.5 bps bid/ask spread
    br.ann_factor = 252

    # have vol target for each signal
    br.signal_vol_adjust = True
Example #33
0
 def __init__(self, *args, **kwargs):
     self.logger = LoggerManager().getLogger(__name__)
class FXCrossFactory:

    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)
        self.fxconv = FXConv()

        if Constants().default_time_series_factory == 'lighttimeseriesfactory':
            self.time_series_factory = LightTimeSeriesFactory()
        else:
            self.time_series_factory = CachedTimeSeriesFactory()
        return

    def get_fx_cross_tick(self, start, end, cross,
                     cut = "NYC", source = "gain", cache_algo='cache_algo_return', type = 'spot'):

        if isinstance(cross, str):
            cross = [cross]

        time_series_request = TimeSeriesRequest()
        time_series_factory = self.time_series_factory
        data_frame_agg = None

        time_series_request.gran_freq = "tick"                  # tick

        time_series_request.freq_mult = 1                       # 1 min
        time_series_request.cut = cut                           # NYC/BGN ticker
        time_series_request.fields = ['bid', 'ask']             # bid/ask field only
        time_series_request.cache_algo = cache_algo             # cache_algo_only, cache_algo_return, internet_load

        time_series_request.environment = 'backtest'
        time_series_request.start_date = start
        time_series_request.finish_date = end
        time_series_request.data_source = source

        time_series_request.category = 'fx'

        for cr in cross:

            if (type == 'spot'):
                time_series_request.tickers = cr

                cross_vals = time_series_factory.harvest_time_series(time_series_request)
                cross_vals.columns = [cr + '.bid', cr + '.ask']

            if data_frame_agg is None:
                data_frame_agg = cross_vals
            else:
                data_frame_agg = data_frame_agg.join(cross_vals, how='outer')

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()
        return data_frame_agg


    def get_fx_cross(self, start, end, cross,
                     cut = "NYC", source = "bloomberg", freq = "intraday", cache_algo='cache_algo_return', type = 'spot'):

        if source == "gain" or source == 'dukascopy' or freq == 'tick':
            return self.get_fx_cross_tick(start, end, cross,
                     cut = cut, source = source, cache_algo='cache_algo_return', type = 'spot')

        if isinstance(cross, str):
            cross = [cross]

        time_series_request = TimeSeriesRequest()
        time_series_factory = self.time_series_factory
        time_series_calcs = TimeSeriesCalcs()
        data_frame_agg = None

        if freq == 'intraday':
            time_series_request.gran_freq = "minute"                # intraday

        elif freq == 'daily':
            time_series_request.gran_freq = "daily"                 # intraday

        time_series_request.freq_mult = 1                       # 1 min
        time_series_request.cut = cut                           # NYC/BGN ticker
        time_series_request.fields = 'close'                    # close field only
        time_series_request.cache_algo = cache_algo             # cache_algo_only, cache_algo_return, internet_load

        time_series_request.environment = 'backtest'
        time_series_request.start_date = start
        time_series_request.finish_date = end
        time_series_request.data_source = source

        for cr in cross:
            base = cr[0:3]
            terms = cr[3:6]

            if (type == 'spot'):
                # non-USD crosses
                if base != 'USD' and terms != 'USD':
                    base_USD = self.fxconv.correct_notation('USD' + base)
                    terms_USD = self.fxconv.correct_notation('USD' + terms)

                    # TODO check if the cross exists in the database

                    # download base USD cross
                    time_series_request.tickers = base_USD
                    time_series_request.category = self.fxconv.em_or_g10(base, freq)
                    base_vals = time_series_factory.harvest_time_series(time_series_request)

                    # download terms USD cross
                    time_series_request.tickers = terms_USD
                    time_series_request.category = self.fxconv.em_or_g10(terms, freq)
                    terms_vals = time_series_factory.harvest_time_series(time_series_request)

                    if (base_USD[0:3] == 'USD'):
                        base_vals = 1 / base_vals
                    if (terms_USD[0:3] == 'USD'):
                        terms_vals = 1 / terms_vals

                    base_vals.columns = ['temp']
                    terms_vals.columns = ['temp']
                    cross_vals = base_vals.div(terms_vals, axis = 'index')
                    cross_vals.columns = [cr + '.close']

                else:
                    if base == 'USD': non_USD = terms
                    if terms == 'USD': non_USD = base

                    correct_cr = self.fxconv.correct_notation(cr)

                    time_series_request.tickers = correct_cr
                    time_series_request.category = self.fxconv.em_or_g10(non_USD, freq)
                    cross_vals = time_series_factory.harvest_time_series(time_series_request)

                    # flip if not convention
                    if(correct_cr != cr):
                        cross_vals = 1 / cross_vals

                    cross_vals.columns.names = [cr + '.close']

            elif type[0:3] == "tot":
                if freq == 'daily':
                    # download base USD cross
                    time_series_request.tickers = base + 'USD'
                    time_series_request.category = self.fxconv.em_or_g10(base, freq) + '-tot'

                    if type == "tot":
                        base_vals = time_series_factory.harvest_time_series(time_series_request)
                    else:
                        x = 0

                    # download terms USD cross
                    time_series_request.tickers = terms + 'USD'
                    time_series_request.category = self.fxconv.em_or_g10(terms, freq) + '-tot'

                    if type == "tot":
                        terms_vals = time_series_factory.harvest_time_series(time_series_request)
                    else:
                        x = 0

                    base_rets = time_series_calcs.calculate_returns(base_vals)
                    terms_rets = time_series_calcs.calculate_returns(terms_vals)

                    cross_rets = base_rets.sub(terms_rets.iloc[:,0],axis=0)

                    # first returns of a time series will by NaN, given we don't know previous point
                    cross_rets.iloc[0] = 0

                    cross_vals = time_series_calcs.create_mult_index(cross_rets)
                    cross_vals.columns = [cr + '-tot.close']

                elif freq == 'intraday':
                    self.logger.info('Total calculated returns for intraday not implemented yet')
                    return None

            if data_frame_agg is None:
                data_frame_agg = cross_vals
            else:
                data_frame_agg = data_frame_agg.join(cross_vals, how='outer')

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()
        return data_frame_agg
        # assume data_frame is in GMT time
        # remove Fri after 22:00 GMT
        # remove Sat
        # remove Sun before 19:00 GMT

        # Monday = 0, ..., Sunday = 6
        data_frame = data_frame.ix[~((data_frame.index.dayofweek == 4) & (data_frame.index.hour > 22))]
        data_frame = data_frame.ix[~((data_frame.index.dayofweek == 5))]
        data_frame = data_frame.ix[~((data_frame.index.dayofweek == 6)& (data_frame.index.hour < 19))]

        return data_frame

# functions to test class
if __name__ == '__main__':

    logger = LoggerManager.getLogger(__name__)

    tsf = TimeSeriesFilter()

    if False:
        start = pandas.to_datetime('2000-01-01')
        end = pandas.to_datetime('2020-01-01')

        logger.info('Get FX holidays')
        hols = tsf.get_holidays(start, end, cal='FX')
        print(hols)

        logger.info('Get business days, excluding holidays')
        bus_days = tsf.create_calendar_bus_days(start, end, cal='FX')
        print(bus_days)
Example #36
0
class BBGLowLevelDaily(BBGLowLevelTemplate):

    def __init__(self):
        super(BBGLowLevelDaily, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
        self._options = []

    def combine_slices(self, data_frame, data_frame_slice):
        if (data_frame_slice.columns.get_level_values(1).values[0]
            not in data_frame.columns.get_level_values(1).values):

            return data_frame.join(data_frame_slice, how="outer")

        return data_frame

    # populate options for Bloomberg request for asset daily request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        self._options.fields = time_series_request.fields

        return self._options

    def process_message(self, msg):
        # Process received events
        ticker = msg.getElement('securityData').getElement('security').getValue()
        fieldData = msg.getElement('securityData').getElement('fieldData')

        # SLOW loop (careful, not all the fields will be returned every time
        # hence need to include the field name in the tuple)
        data = defaultdict(dict)

        for i in range(fieldData.numValues()):
            for j in range(1, fieldData.getValue(i).numElements()):
                data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \
                    = fieldData.getValue(i).getElement(j).getValue()

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not(data_frame.empty)):
            # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            data_frame.index = pandas.to_datetime(data_frame.index)
            self.logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("HistoricalDataRequest")

        request.set("startDate", self._options.startDateTime.strftime('%Y%m%d'))
        request.set("endDate", self._options.endDateTime.strftime('%Y%m%d'))

        # # only one security/eventType per request
        for field in self._options.fields:
            request.getElement("fields").appendValue(field)

        for security in self._options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Daily Request:" + str(request))
        session.sendRequest(request)
class StrategyFXCTA_Example(StrategyTemplate):

    def __init__(self):
        super(StrategyTemplate, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        ##### FILL IN WITH YOUR OWN PARAMETERS FOR display, dumping, TSF etc.
        self.tsfactory = LightTimeSeriesFactory()
        self.DUMP_CSV = 'output_data/'
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' '
        self.FINAL_STRATEGY = 'Thalesians FX CTA'
        self.SCALE_FACTOR = 3
        
        return

    ###### Parameters and signal generations (need to be customised for every model)
    def fill_backtest_request(self):

        ##### FILL IN WITH YOUR OWN BACKTESTING PARAMETERS
        br = BacktestRequest()

        # get all asset data
        br.start_date = "04 Jan 1989"
        br.finish_date = datetime.datetime.utcnow()
        br.spot_tc_bp = 0.5
        br.ann_factor = 252

        br.plot_start = "01 Apr 2015"
        br.calc_stats = True
        br.write_csv = False
        br.plot_interim = True
        br.include_benchmark = True

        # have vol target for each signal
        br.signal_vol_adjust = True
        br.signal_vol_target = 0.1
        br.signal_vol_max_leverage = 5
        br.signal_vol_periods = 20
        br.signal_vol_obs_in_year = 252
        br.signal_vol_rebalance_freq = 'BM'
        br.signal_vol_resample_freq = None

        # have vol target for portfolio
        br.portfolio_vol_adjust = True
        br.portfolio_vol_target = 0.1
        br.portfolio_vol_max_leverage = 5
        br.portfolio_vol_periods = 20
        br.portfolio_vol_obs_in_year = 252
        br.portfolio_vol_rebalance_freq = 'BM'
        br.portfolio_vol_resample_freq = None

        # tech params
        br.tech_params.sma_period = 200

        return br

    def fill_assets(self):
        ##### FILL IN WITH YOUR ASSET DATA

        # for FX basket
        full_bkt    = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD',
                       'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK']

        basket_dict = {}

        for i in range(0, len(full_bkt)):
            basket_dict[full_bkt[i]] = [full_bkt[i]]

        basket_dict['Thalesians FX CTA'] = full_bkt

        br = self.fill_backtest_request()

        self.logger.info("Loading asset data...")

        vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS',
                          'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS']

        time_series_request = TimeSeriesRequest(
                    start_date = br.start_date,                     # start date
                    finish_date = br.finish_date,                   # finish date
                    freq = 'daily',                                 # daily data
                    data_source = 'quandl',                         # use Quandl as data source
                    tickers = full_bkt,                             # ticker (Thalesians)
                    fields = ['close'],                                 # which fields to download
                    vendor_tickers = vendor_tickers,                    # ticker (Quandl)
                    vendor_fields = ['close'],                          # which Bloomberg fields to download
                    cache_algo = 'internet_load_return')                # how to return data

        asset_df = self.tsfactory.harvest_time_series(time_series_request)

        # signalling variables
        spot_df = asset_df
        spot_df2 = None

        return asset_df, spot_df, spot_df2, basket_dict

    def construct_signal(self, spot_df, spot_df2, tech_params, br):

        ##### FILL IN WITH YOUR OWN SIGNALS

        # use technical indicator to create signals
        # (we could obviously create whatever function we wanted for generating the signal dataframe)
        tech_ind = TechIndicator()
        tech_ind.create_tech_ind(spot_df, 'SMA', tech_params); signal_df = tech_ind.get_signal()

        return signal_df

    def construct_strategy_benchmark(self):

        ###### FILL IN WITH YOUR OWN BENCHMARK

        tsr_indices = TimeSeriesRequest(
            start_date = '01 Jan 1980',                     # start date
            finish_date = datetime.datetime.utcnow(),       # finish date
            freq = 'daily',                                 # intraday data
            data_source = 'quandl',                         # use Bloomberg as data source
            tickers = ["EURUSD"],                           # tickers to download
            vendor_tickers=['FRED/DEXUSEU'],
            fields = ['close'],                             # which fields to download
            vendor_fields = ['close'],
            cache_algo = 'cache_algo_return')               # how to return data)

        df = self.tsfactory.harvest_time_series(tsr_indices)

        df.columns = [x.split(".")[0] for x in df.columns]

        return df
Example #38
0
    def __init__(self):
        super(BBGLowLevelRef, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
        self._options = []
Example #39
0
 def __init__(self):
     super(LoaderPandasWeb, self).__init__()
     self.logger = LoggerManager().getLogger(__name__)
class TimeSeriesRequest:

    # properties
    #
    # data_source eg. bbg, yahoo, quandl
    # start_date
    # finish_date
    # tickers (can be list) eg. EURUSD
    # category (eg. fx, equities, fixed_income, cal_event, fundamental)
    # freq_mult (eg. 1)
    # freq
    # gran_freq (minute, daily, hourly, daily, weekly, monthly, yearly)
    # fields (can be list)
    # vendor_tickers (optional)
    # vendor_fields (optional)
    # cache_algo (eg. internet, disk, memory) - internet will forcibly download from the internet
    # environment (eg. prod, backtest) - old data is saved with prod, backtest will overwrite the last data point
    def __init__(self,
                 data_source=None,
                 start_date=None,
                 finish_date=None,
                 tickers=None,
                 category=None,
                 freq_mult=None,
                 freq=None,
                 gran_freq=None,
                 cut=None,
                 fields=None,
                 cache_algo=None,
                 vendor_tickers=None,
                 vendor_fields=None,
                 environment="backtest",
                 trade_side='trade'):

        self.logger = LoggerManager().getLogger(__name__)

        self.freq_mult = 1

        if data_source is not None: self.data_source = data_source
        if start_date is not None: self.start_date = start_date
        if finish_date is not None: self.finish_date = finish_date
        if tickers is not None: self.tickers = tickers
        if category is not None: self.category = category
        if gran_freq is not None: self.gran_freq = gran_freq
        if freq_mult is not None: self.freq_mult = freq_mult
        if freq is not None: self.freq = freq
        if cut is not None: self.cut = cut
        if fields is not None: self.fields = fields
        if cache_algo is not None: self.cache_algo = cache_algo
        if vendor_tickers is not None: self.vendor_tickers = vendor_tickers
        if vendor_fields is not None: self.vendor_fields = vendor_fields
        if environment is not None: self.environment = environment
        if trade_side is not None: self.trade_side = trade_side

    @property
    def data_source(self):
        return self.__data_source

    @data_source.setter
    def data_source(self, data_source):
        valid_data_source = [
            'ats', 'bloomberg', 'dukascopy', 'fred', 'gain', 'google',
            'quandl', 'yahoo'
        ]

        if not data_source in valid_data_source:
            self.logger.warning(data_source & " is not a defined data source.")

        self.__data_source = data_source

    @property
    def category(self):
        return self.__category

    @category.setter
    def category(self, category):
        self.__category = category

    @property
    def tickers(self):
        return self.__tickers

    @tickers.setter
    def tickers(self, tickers):
        if not isinstance(tickers, list):
            tickers = [tickers]

        self.__tickers = tickers

    @property
    def fields(self):
        return self.__fields

    @fields.setter
    def fields(self, fields):
        valid_fields = ['open', 'high', 'low', 'close', 'volume', 'numEvents']

        if not isinstance(fields, list):
            fields = [fields]

        for field_entry in fields:
            if not field_entry in valid_fields:
                i = 0
                # self.logger.warning(field_entry + " is not a valid field.")

        # add error checking

        self.__fields = fields

    @property
    def vendor_tickers(self):
        return self.__vendor_tickers

    @vendor_tickers.setter
    def vendor_tickers(self, vendor_tickers):
        if not isinstance(vendor_tickers, list):
            vednor_tickers = [vendor_tickers]

        self.__vendor_tickers = vendor_tickers

    @property
    def vendor_fields(self):
        return self.__vendor_fields

    @vendor_fields.setter
    def vendor_fields(self, vendor_fields):
        if not isinstance(vendor_fields, list):
            vendor_fields = [vendor_fields]

        self.__vendor_fields = vendor_fields

    @property
    def freq(self):
        return self.__freq

    @freq.setter
    def freq(self, freq):
        freq = freq.lower()

        valid_freq = ['tick', 'intraday', 'daily']

        if not freq in valid_freq:
            self.logger.warning(freq & " is not a defined frequency")

        self.__freq = freq

    @property
    def gran_freq(self):
        return self.__gran_freq

    @gran_freq.setter
    def gran_freq(self, gran_freq):
        gran_freq = gran_freq.lower()

        valid_gran_freq = [
            'tick', 'minute', 'hourly', 'pseudodaily', 'daily', 'weekly',
            'monthly', 'quarterly', 'yearly'
        ]

        if not gran_freq in valid_gran_freq:
            self.logger.warning(gran_freq & " is not a defined frequency")

        if gran_freq in ['minute', 'hourly']:
            self.__freq = 'intraday'
        elif gran_freq in ['tick']:
            self.__freq = 'tick'
        else:
            self.__freq = 'daily'

        self.__gran_freq = gran_freq

    @property
    def freq_mult(self):
        return self.__freq_mult

    @freq_mult.setter
    def freq_mult(self, freq_mult):
        self.__freq_mult = freq_mult

    @property
    def start_date(self):
        return self.__start_date

    @start_date.setter
    def start_date(self, start_date):
        self.__start_date = self.date_parser(start_date)

    @property
    def finish_date(self):
        return self.__finish_date

    @finish_date.setter
    def finish_date(self, finish_date):
        self.__finish_date = self.date_parser(finish_date)

    @property
    def cut(self):
        return self.__cut

    @cut.setter
    def cut(self, cut):
        self.__cut = cut

    def date_parser(self, date):
        if isinstance(date, str):
            # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M'
            try:
                date = datetime.strptime(date, '%b %d %Y %H:%M')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

            # format expected '1 Jun 2005 01:33', '%d %b %Y %H:%M'
            try:
                date = datetime.strptime(date, '%d %b %Y %H:%M')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

            try:
                date = datetime.strptime(date, '%b %d %Y')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

            try:
                date = datetime.strptime(date, '%d %b %Y')
            except:
                # self.logger.warning("Attempted to parse date")
                i = 0

        return date

    @property
    def cache_algo(self):
        return self.__cache_algo

    @cache_algo.setter
    def cache_algo(self, cache_algo):
        cache_algo = cache_algo.lower()

        valid_cache_algo = [
            'internet_load', 'internet_load_return', 'cache_algo',
            'cache_algo_return'
        ]

        if not cache_algo in valid_cache_algo:
            self.logger.warning(cache_algo +
                                " is not a defined caching scheme")

        self.__cache_algo = cache_algo

    @property
    def environment(self):
        return self.__environment

    @environment.setter
    def environment(self, environment):
        environment = environment.lower()

        valid_environment = ['prod', 'backtest']

        if not environment in valid_environment:
            self.logger.warning(environment + " is not a defined environment.")

        self.__environment = environment

    @property
    def trade_side(self):
        return self.__trade_side

    @trade_side.setter
    def trade_side(self, trade_side):
        trade_side = trade_side.lower()

        valid_trade_side = ['trade', 'bid', 'ask']

        if not trade_side in valid_trade_side:
            self.logger.warning(trade_side + " is not a defined trade side.")

        self.__trade_side = trade_side
 def __init__(self):
     self.logger = LoggerManager().getLogger(__name__)
Example #42
0
 def __init__(self):
     self.logger = LoggerManager().getLogger(__name__)
     self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' '
     self.scale_factor = 3
     return
from pythalesians.util.loggermanager import LoggerManager

# to download market data
from pythalesians.market.requests.timeseriesrequest import TimeSeriesRequest
from pythalesians.market.loaders.lighttimeseriesfactory import LightTimeSeriesFactory

# for plotting graphs
from pythalesians.graphics.graphs.plotfactory import PlotFactory
from pythalesians.graphics.graphs.graphproperties import GraphProperties

# for making elementary calculations on the time series
from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs
from datetime import timedelta

if True:
    logger = LoggerManager().getLogger(__name__)

    import datetime

    # just change "False" to "True" to run any of the below examples

    ###### download daily data from Bloomberg for USD/BRL and get biggest downmoves
    if True:
        tsc = TimeSeriesCalcs()

        time_series_request = TimeSeriesRequest(
            start_date="01 Jan 2005",  # start date
            finish_date=datetime.datetime.utcnow(),  # finish date
            freq='daily',  # daily data
            data_source='bloomberg',  # use Bloomberg as data source
            tickers=['USDBRL'],  # ticker (Thalesians)
Example #44
0
 def __init__(self):
     super(LoaderQuandl, self).__init__()
     self.logger = LoggerManager().getLogger(__name__)
class StrategyFXCTA_Example(StrategyTemplate):
    def __init__(self):
        super(StrategyTemplate, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        ##### FILL IN WITH YOUR OWN PARAMETERS FOR display, dumping, TSF etc.
        self.tsfactory = LightTimeSeriesFactory()
        self.DUMP_CSV = 'output_data/'
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime(
            "%Y%m%d") + ' '
        self.FINAL_STRATEGY = 'Thalesians FX CTA'
        self.SCALE_FACTOR = 1  #  specify plot size multiplier (should be larger on 4K monitors!)

        return

    ###### Parameters and signal generations (need to be customised for every model)
    def fill_backtest_request(self):

        ##### FILL IN WITH YOUR OWN BACKTESTING PARAMETERS
        br = BacktestRequest()

        # get all asset data
        br.start_date = "04 Jan 1989"  # start date of backtest
        br.finish_date = datetime.datetime.utcnow()  # end date of backtest
        br.spot_tc_bp = 0.5  # bid/ask spread in basis point
        br.ann_factor = 252  # number of points in year (working)

        br.plot_start = "01 Apr 2015"  # when to start plotting
        br.calc_stats = True  # add stats to legends of plots
        br.write_csv = False  # write CSV output
        br.plot_interim = True  # plot at various stages of process
        br.include_benchmark = True  # plot trading returns versus benchmark

        # have vol target for each signal
        br.signal_vol_adjust = True  # vol adjust weighting for asset vol
        br.signal_vol_target = 0.1  # 10% vol target for each asset
        br.signal_vol_max_leverage = 5  # maximum leverage of 5
        br.signal_vol_periods = 20  # calculate realised vol over 20 periods
        br.signal_vol_obs_in_year = 252  # number of periods in year
        br.signal_vol_rebalance_freq = 'BM'  # reweight at end of month
        br.signal_vol_resample_freq = None

        # have vol target for portfolio
        br.portfolio_vol_adjust = True  # vol adjust for portfolio
        br.portfolio_vol_target = 0.1  # portfolio vol target is 10%
        br.portfolio_vol_max_leverage = 5  # max leverage of 5
        br.portfolio_vol_periods = 20  # calculate realised vol over 20 periods
        br.portfolio_vol_obs_in_year = 252  # number of periods in year
        br.portfolio_vol_rebalance_freq = 'BM'  # reweight at end of month
        br.portfolio_vol_resample_freq = None

        # tech params
        br.tech_params.sma_period = 200  # use 200D SMA later

        return br

    def fill_assets(self):
        ##### FILL IN WITH YOUR ASSET DATA

        # for FX basket
        full_bkt = [
            'EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD', 'NZDUSD',
            'USDCHF', 'USDNOK', 'USDSEK'
        ]

        basket_dict = {}

        for i in range(0, len(full_bkt)):
            basket_dict[full_bkt[i]] = [full_bkt[i]]

        basket_dict['Thalesians FX CTA'] = full_bkt

        br = self.fill_backtest_request()

        self.logger.info("Loading asset data...")

        vendor_tickers = [
            'FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL',
            'FRED/DEXCAUS', 'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS',
            'FRED/DEXSDUS'
        ]

        time_series_request = TimeSeriesRequest(
            start_date=br.start_date,  # start date
            finish_date=br.finish_date,  # finish date
            freq='daily',  # daily data
            data_source='quandl',  # use Quandl as data source
            tickers=full_bkt,  # ticker (Thalesians)
            fields=['close'],  # which fields to download
            vendor_tickers=vendor_tickers,  # ticker (Quandl)
            vendor_fields=['close'],  # which Bloomberg fields to download
            cache_algo='internet_load_return')  # how to return data

        asset_df = self.tsfactory.harvest_time_series(time_series_request)

        # signalling variables
        spot_df = asset_df
        spot_df2 = None

        return asset_df, spot_df, spot_df2, basket_dict

    def construct_signal(self, spot_df, spot_df2, tech_params, br):

        ##### FILL IN WITH YOUR OWN SIGNALS

        # use technical indicator to create signals
        # (we could obviously create whatever function we wanted for generating the signal dataframe)
        tech_ind = TechIndicator()
        tech_ind.create_tech_ind(spot_df, 'SMA', tech_params)
        signal_df = tech_ind.get_signal()

        return signal_df

    def construct_strategy_benchmark(self):

        ###### FILL IN WITH YOUR OWN BENCHMARK

        tsr_indices = TimeSeriesRequest(
            start_date='01 Jan 1980',  # start date
            finish_date=datetime.datetime.utcnow(),  # finish date
            freq='daily',  # daily data
            data_source='quandl',  # use Quandl as data source
            tickers=["EURUSD"],  # tickers to download
            vendor_tickers=['FRED/DEXUSEU'],
            fields=['close'],  # which fields to download
            vendor_fields=['close'],
            cache_algo='cache_algo_return')  # how to return data)

        df = self.tsfactory.harvest_time_series(tsr_indices)

        df.columns = [x.split(".")[0] for x in df.columns]

        return df
Example #46
0
class LightTimeSeriesFactory:
    _time_series_cache = {}  # shared across all instances of object!

    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()
        self._bbg_default_api = Constants().bbg_default_api
        self._intraday_code = -1

        return

    def set_bloomberg_com_api(self):
        """
        set_bloomberg_com_api - Sets Bloomberg API to COM library
        """

        self._bbg_default_api = 'com-api'

    def set_bloomberg_open_api(self):
        """
        set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended)
        """

        self._bbg_default_api = 'open-api'

    def flush_cache(self):
        """
        flush_cache - Flushs internal cache of time series
        """

        self._time_series_cache = {}

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_loader(self, source):
        """
        get_loader - Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.

        Returns
        -------
        LoaderTemplate
        """

        loader = None

        if source == 'bloomberg':

            ### allow use of COM API (older) and Open APIs (newer) for Bloomberg
            if self._bbg_default_api == 'com-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM
                loader = LoaderBBGCOM()
            elif self._bbg_default_api == 'open-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen
                loader = LoaderBBGOpen()

        elif source == 'quandl':
            from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl
            loader = LoaderQuandl()

        elif source == 'dukascopy':
            from pythalesians.market.loaders.lowlevel.brokers.loaderdukascopy import LoaderDukasCopy
            loader = LoaderDukasCopy()

        elif source in ['yahoo', 'google', 'fred']:
            from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb
            loader = LoaderPandasWeb()

        # TODO add support for other data sources (like Reuters)

        return loader

    def harvest_time_series(self, time_series_request, kill_session=True):
        """
        havest_time_series - Loads time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        tickers = time_series_request.tickers
        loader = self.get_loader(time_series_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None:
            create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            time_series_request.tickers = self.config.get_tickers_list_for_category(
                time_series_request.category, time_series_request.source,
                time_series_request.freq, time_series_request.cut)

        # intraday or tick: only one ticker per cache file
        if (time_series_request.freq in ['intraday', 'tick']):
            data_frame_agg = self.download_intraday_tick(
                time_series_request, loader)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else:
            data_frame_agg = self.download_daily(time_series_request, loader)

        if ('internet_load' in time_series_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to loader template to exit session
            # if loader is not None and kill_session == True: loader.kill_session()

        if (time_series_request.cache_algo == 'cache_algo'):
            self.logger.debug(
                "Only caching data in memory, do not return any time series.")
            return

        tsf = TimeSeriesFilter()

        # only return time series if specified in the algo
        if 'return' in time_series_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if hasattr(time_series_request, 'category'):
                if 'events' in time_series_request.category:
                    return data_frame_agg

            try:
                return tsf.filter_time_series(time_series_request,
                                              data_frame_agg)
            except:
                import traceback

                self.logger.error(traceback.format_exc())

                return None

    def get_time_series_cached(self, time_series_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        if (time_series_request.freq == "intraday"):
            ticker = time_series_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(time_series_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            tsf = TimeSeriesFilter()

            return tsf.filter_time_series(time_series_request, data_frame)

        return None

    def create_time_series_hash_key(self, time_series_request, ticker=None):
        """
        create_time_series_hash_key - Creates a hash key for retrieving the time series

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if (isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(
            self.create_category_key(time_series_request, ticker))

    def download_intraday_tick(self, time_series_request, loader):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None

        ticker_cycle = 0

        # single threaded version
        # handle intraday ticker calls separately one by one
        if len(time_series_request.tickers) == 1 or Constants(
        ).time_series_factory_thread_no['other'] == 1:
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [
                        time_series_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                # we downscale into float32, to avoid memory problems in Python (32 bit)
                # data is stored on disk as float32 anyway
                data_frame_single = loader.load_ticker(
                    time_series_request_single)

                # if the vendor doesn't provide any data, don't attempt to append
                if data_frame_single is not None:
                    if data_frame_single.empty == False:
                        data_frame_single.index.name = 'Date'
                        data_frame_single = data_frame_single.astype('float32')

                        # if you call for returning multiple tickers, be careful with memory considerations!
                        if data_frame_agg is not None:
                            data_frame_agg = data_frame_agg.join(
                                data_frame_single, how='outer')
                        else:
                            data_frame_agg = data_frame_single

                # key = self.create_category_key(time_series_request, ticker)
                # fname = self.create_cache_file_name(key)
                # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

            return data_frame_agg
        else:
            time_series_request_list = []

            # create a list of TimeSeriesRequests
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [
                        time_series_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                time_series_request_list.append(time_series_request_single)

            return self.fetch_group_time_series(time_series_request_list)

    def fetch_single_time_series(self, time_series_request):
        data_frame_single = self.get_loader(
            time_series_request.data_source).load_ticker(time_series_request)

        if data_frame_single is not None:
            if data_frame_single.empty == False:
                data_frame_single.index.name = 'Date'
                data_frame_single = data_frame_single.astype('float32')

        return data_frame_single

    def fetch_group_time_series(self, time_series_request_list):

        data_frame_agg = None

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if Constants().time_series_factory_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = Constants().time_series_factory_thread_no['other']

        if time_series_request_list[0].data_source in Constants(
        ).time_series_factory_thread_no:
            thread_no = Constants().time_series_factory_thread_no[
                time_series_request_list[0].data_source]

        pool = Pool(thread_no)

        # open the market data downloads in their own threads and return the results
        result = pool.map_async(self.fetch_single_time_series,
                                time_series_request_list)
        data_frame_group = result.get()

        pool.close()
        pool.join()

        # data_frame_group = results.get()
        # data_frame_group = results
        # data_frame_group = None

        #import multiprocessing as multiprocessing
        # close the pool and wait for the work to finish

        # processes = []

        # for x in range(0, len(time_series_request_list)):
        #    time_series_request = time_series_request_list[x]
        # processes =   [multiprocessing.Process(target = self.fetch_single_time_series,
        #                                           args = (x)) for x in time_series_request_list]

        # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq,
        #             exclude_freq_cat, force_new_download_freq_cat, include_freq_cat))

        # Run processes
        # for p in processes: p.start()

        # Exit the completed processes
        # for p in processes: p.join()

        # collect together all the time series
        if data_frame_group is not None:
            for data_frame_single in data_frame_group:
                # if you call for returning multiple tickers, be careful with memory considerations!
                if data_frame_single is not None:
                    if data_frame_agg is not None:
                        data_frame_agg = data_frame_agg.join(data_frame_single,
                                                             how='outer')
                    else:
                        data_frame_agg = data_frame_single

        return data_frame_agg

    def download_daily(self, time_series_request, loader):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file

        if Constants().time_series_factory_thread_no['other'] == 1:
            data_frame_agg = loader.load_ticker(time_series_request)
        else:
            time_series_request_list = []

            group_size = int(
                len(time_series_request.tickers) /
                Constants().time_series_factory_thread_no['other'] - 1)

            if group_size == 0: group_size = 1

            # split up tickers into groups related to number of threads to call
            for i in range(0, len(time_series_request.tickers), group_size):
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = time_series_request.tickers[
                    i:i + group_size]

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = \
                        time_series_request.vendor_tickers[i:i + group_size]

                time_series_request_list.append(time_series_request_single)

            data_frame_agg = self.fetch_group_time_series(
                time_series_request_list)

        key = self.create_category_key(time_series_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[
            fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def create_category_key(self, time_series_request, ticker=None):
        """
        create_category_key - Returns a category key for the associated TimeSeriesRequest

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        category = 'default-cat'
        cut = 'default-cut'

        if hasattr(time_series_request, 'category'):
            category = time_series_request.category

        environment = time_series_request.environment
        source = time_series_request.data_source
        freq = time_series_request.freq

        if hasattr(time_series_request, 'cut'): cut = time_series_request.cut

        if (ticker is not None):
            key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker
        else:
            key = environment + "." + category + '.' + source + '.' + freq + '.' + cut

        return key

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename
Example #47
0
 def __init__(self):
     self.logger = LoggerManager().getLogger(__name__)
     self._techind = None
     self._signal = None