Beispiel #1
0
class TwitterPyThalesians:
    def __init__(self, *args, **kwargs):
        self.logger = LoggerManager().getLogger(__name__)

    def set_key(self, APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET):
        self.twitter = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN,
                               OAUTH_TOKEN_SECRET)

    def auto_set_key(self):
        self.twitter = Twython(Constants().APP_KEY,
                               Constants().APP_SECRET,
                               Constants().OAUTH_TOKEN,
                               Constants().OAUTH_TOKEN_SECRET)

    def update_status(self, msg, link=None, picture=None):
        # 22 chars URL
        # 23 chars picture

        chars_lim = 140

        if link is not None: chars_lim = chars_lim - (22 * link)
        if picture is not None: chars_lim = chars_lim - 23

        if (len(msg) > chars_lim):
            self.logger.info("Message too long for Twitter!")

        if picture is None:
            self.twitter.update_status(status=msg)
        else:
            photo = open(picture, 'rb')
            self.twitter.update_status_with_media(status=msg, media=photo)
class TwitterPyThalesians:

    def __init__(self, *args, **kwargs):
        self.logger = LoggerManager().getLogger(__name__)

    def set_key(self, APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET):
        self.twitter = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET)

    def auto_set_key(self):
        self.twitter = Twython(Constants().APP_KEY, Constants().APP_SECRET,
                               Constants().OAUTH_TOKEN, Constants().OAUTH_TOKEN_SECRET)

    def update_status(self, msg, link = None, picture = None):
        # 22 chars URL
        # 23 chars picture

        chars_lim = 140

        if link is not None: chars_lim = chars_lim - (22 * link)
        if picture is not None: chars_lim = chars_lim - 23

        if (len(msg) > chars_lim):
            self.logger.info("Message too long for Twitter!")

        if picture is None:
            self.twitter.update_status(status=msg)
        else:
            photo = open(picture, 'rb')
            self.twitter.update_status_with_media(status=msg, media=photo)
Beispiel #3
0
class LoaderPandasWeb(LoaderTemplate):
    def __init__(self):
        super(LoaderPandasWeb, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        time_series_request_vendor = self.construct_vendor_time_series_request(
            time_series_request)

        self.logger.info("Request Pandas Web data")

        data_frame = self.download_daily(time_series_request_vendor)
        data_frame = data_frame.to_frame().unstack()

        print(data_frame.tail())

        if data_frame.index is []: return None

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            fields = self.translate_from_vendor_field(returned_fields,
                                                      time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            ticker_requested = []

            for f in time_series_request.fields:
                for t in time_series_request.tickers:
                    ticker_requested.append(t + "." + f)

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

            # only return the requested tickers
            data_frame = pandas.DataFrame(data=data_frame[ticker_requested],
                                          index=data_frame.index,
                                          columns=ticker_requested)

        self.logger.info("Completed request from Pandas Web.")

        return data_frame

    def download_daily(self, time_series_request):
        return web.DataReader(time_series_request.tickers,
                              time_series_request.data_source,
                              time_series_request.start_date,
                              time_series_request.finish_date)
Beispiel #4
0
class LoaderQuandl(LoaderTemplate):

    def __init__(self):
        super(LoaderQuandl, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request)

        self.logger.info("Request Quandl data")

        data_frame = self.download_daily(time_series_request_vendor)

        if data_frame.index is []: return None

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_tickers = data_frame.columns

        if data_frame is not None:
            # tidy up tickers into a format that is more easily translatable
            returned_tickers = [x.replace(' - Value', '') for x in returned_tickers]
            returned_tickers = [x.replace('.', '/') for x in returned_tickers]

            fields = self.translate_from_vendor_field(['close' for x in returned_tickers], time_series_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        self.logger.info("Completed request from Quandl.")

        return data_frame

    def download_daily(self, time_series_request):
        return Quandl.get(time_series_request.tickers, authtoken=Constants().quandl_api_key, trim_start=time_series_request.start_date,
                          trim_end=time_series_request.finish_date)
Beispiel #5
0
class WebDataTemplate:

    def __init__(self):
        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        return

    @abc.abstractmethod
    def download_raw_data(self):
        return

    @abc.abstractmethod
    def construct_indicator(self):
        return

    def dump_indicator(self):

        indicator_group = self.raw_indicator # self.raw_indicator.join(self.processed_indicator, how='outer')

        self.logger.info("About to write all web indicators")
        indicator_group.to_csv(self._csv_indicator_dump, date_format='%d/%m/%Y %H:%M:%S')
class DataLoaderTemplate:

    def __init__(self):
        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        return

    def load_database(self, key = None):
        tsio = TimeSeriesIO()
        tsc = TimeSeriesCalcs()

        file = self._hdf5

        if key is not None:
            file = self._hdf5 + key + ".h5"

        # if cached file exists, use that, otherwise load CSV
        if os.path.isfile(file):
            self.logger.info("About to load market database from HDF5...")
            self.news_database = tsio.read_time_series_cache_from_disk(file)
            self.news_database = self.preprocess(self.news_database)
        else:
            self.logger.info("About to load market database from CSV...")
            self.news_database = self.load_csv()

        return self.news_database

    @abc.abstractmethod
    def load_csv(self):
        return

    def get_database(self, key):
        return self.news_database

    @abc.abstractmethod
    def preprocess(self, df):
        return
class CreateDataIndexTemplate:

    def __init__(self):
        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        return

    @abc.abstractmethod
    def create_indicator(self):
        return

    @abc.abstractmethod
    def aggregate_news_data(self, raw_database):
        return

    @abc.abstractmethod
    def get_cached_aggregate(self):
        return

    def grab_indicator(self):
        return self.indicator

    def grab_econ_indicator(self):
        return self.indicator_econ

    def grab_final_indicator(self):
        return self.indicator_final

    def truncate_indicator(self, daily_ind, match):
        cols = daily_ind.columns.values

        to_include = []

        for i in range(0, len(cols)):
            if match in cols[i]:
                to_include.append(i)

        return daily_ind[daily_ind.columns[to_include]]

    def dump_indicators(self):
        tsf = TimeSeriesFilter()
        self.logger.info("About to write all indicators to CSV")
        self.indicator.to_csv(self._csv_indicator_dump, date_format='%d/%m/%Y')

        if (self._csv_econ_indicator_dump is not None):
            self.logger.info("About to write economy based indicators to CSV")
            self.indicator_econ.to_csv(self._csv_econ_indicator_dump, date_format='%d/%m/%Y')

        self.logger.info("About to write final indicators to CSV")

        # remove weekends and remove start of series
        if (self._csv_final_indicator_dump is not None):
            indicator_final_copy = tsf.filter_time_series_by_holidays(self.indicator_final, cal = 'WEEKDAY')
            indicator_final_copy = tsf.filter_time_series_by_date(
                start_date="01 Jan 2000", finish_date = None, data_frame=indicator_final_copy)

            indicator_final_copy.to_csv(self._csv_final_indicator_dump, date_format='%d/%m/%Y')
Beispiel #8
0
class DataLoaderTemplate:
    def __init__(self):
        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        return

    def load_database(self, key=None):
        tsio = TimeSeriesIO()
        tsc = TimeSeriesCalcs()

        file = self._hdf5

        if key is not None:
            file = self._hdf5 + key + ".h5"

        # if cached file exists, use that, otherwise load CSV
        if os.path.isfile(file):
            self.logger.info("About to load market database from HDF5...")
            self.news_database = tsio.read_time_series_cache_from_disk(file)
            self.news_database = self.preprocess(self.news_database)
        else:
            self.logger.info("About to load market database from CSV...")
            self.news_database = self.load_csv()

        return self.news_database

    @abc.abstractmethod
    def load_csv(self):
        return

    def get_database(self, key):
        return self.news_database

    @abc.abstractmethod
    def preprocess(self, df):
        return
                Constants.time_series_factory_thread_technique = tech

                for no in thread_no:
                    for key in Constants.time_series_factory_thread_no:
                        Constants.time_series_factory_thread_no[key] = no

                    import time
                    start = time.time();
                    df = ltsf.harvest_time_series(time_series_request);
                    end = time.time()
                    duration = end - start

                    diag.append("With " + str(no) + " " + tech + " no: " + str(duration) + " seconds")

            for d in diag:
                logger.info(d)

        ###### download intraday data from Bloomberg for FX, with different threading techniques
        if True:

            from datetime import timedelta

            time_series_request = TimeSeriesRequest(
                    start_date = datetime.date.today() - timedelta(days=10),    # start date
                    finish_date = datetime.date.today(),                        # finish date
                    freq = 'intraday',                                          # intraday data
                    data_source = 'bloomberg',                      # use Bloomberg as data source
                    tickers = ['EURUSD',                            # ticker (Thalesians)
                               'GBPUSD',
                               'USDJPY',
                               'AUDUSD'],
Beispiel #10
0
class BBGLowLevelRef(BBGLowLevelTemplate):
    def __init__(self):
        super(BBGLowLevelRef, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
        self._options = []

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        self._options.fields = time_series_request.fields

        return self._options

    def process_message(self, msg):
        data = collections.defaultdict(dict)

        # process received events
        securityDataArray = msg.getElement('securityData')

        index = 0

        for securityData in list(securityDataArray.values()):
            ticker = securityData.getElementAsString("security")
            fieldData = securityData.getElement("fieldData")

            for field in fieldData.elements():
                if not field.isValid():
                    field_name = "%s" % field.name()

                    self.logger.error(field_name + " is NULL")
                elif field.isArray():
                    # iterate over complex data returns.
                    field_name = "%s" % field.name()

                    for i, row in enumerate(field.values()):
                        data[(field_name, ticker)][index] = re.findall(
                            r'"(.*?)"', "%s" % row)[0]

                        index = index + 1
                # else:
                # vals.append(re.findall(r'"(.*?)"', "%s" % row)[0])
                # print("%s = %s" % (field.name(), field.getValueAsString()))

            fieldExceptionArray = securityData.getElement("fieldExceptions")

            for fieldException in list(fieldExceptionArray.values()):
                errorInfo = fieldException.getElement("errorInfo")
                print(errorInfo.getElementAsString("category"), ":", \
                    fieldException.getElementAsString("fieldId"))

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not (data_frame.empty)):
            data_frame.columns = pandas.MultiIndex.from_tuples(
                data, names=['field', 'ticker'])
            self.logger.info("Reading: " + ticker + ' ' +
                             str(data_frame.index[0]) + ' - ' +
                             str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    def combine_slices(self, data_frame, data_frame_slice):
        if (data_frame_slice.columns.get_level_values(1).values[0]
                not in data_frame.columns.get_level_values(1).values):

            return data_frame.join(data_frame_slice, how="outer")

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest('ReferenceDataRequest')

        self.add_override(request, 'TIME_ZONE_OVERRIDE', 23)  # force GMT time
        self.add_override(request, 'START_DT',
                          self._options.startDateTime.strftime('%Y%m%d'))
        self.add_override(request, 'END_DT',
                          self._options.endDateTime.strftime('%Y%m%d'))

        # only one security/eventType per request
        for field in self._options.fields:
            request.getElement("fields").appendValue(field)

        for security in self._options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Ref Request:" + str(request))
        session.sendRequest(request)
Beispiel #11
0
class TimeSeriesIO:
    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)

    ### functions to handle Excel on disk
    def write_time_series_to_excel(self,
                                   fname,
                                   sheet,
                                   data_frame,
                                   create_new=False):
        """
        write_time_series_to_excel - writes Pandas data frame to disk in Excel format

        Parameters
        ----------
        fname : str
            Excel filename to be written to
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        create_new : boolean
            to create a new Excel file
        """

        if (create_new):
            writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
        else:
            if os.path.isfile(fname):
                book = load_workbook(fname)
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
                writer.book = book
                writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
            else:
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')

        data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter')

        writer.save()
        writer.close()

    def write_time_series_to_excel_writer(self, writer, sheet, data_frame):
        data_frame.to_excel(writer, sheet, engine='xlsxwriter')

    def read_excel_data_frame(self,
                              f_name,
                              excel_sheet,
                              freq,
                              cutoff=None,
                              dateparse=None,
                              postfix='.close',
                              intraday_tz='UTC'):

        return self.read_csv_data_frame(f_name,
                                        freq,
                                        cutoff=cutoff,
                                        dateparse=dateparse,
                                        postfix=postfix,
                                        intraday_tz=intraday_tz,
                                        excel_sheet=excel_sheet)

    ### functions to handle HDF5 on disk
    def write_time_series_cache_to_disk(self, fname, data_frame):
        """
        write_time_series_cache_to_disk - writes Pandas data frame to disk as HDF5 format

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        """

        store = pandas.HDFStore(self.get_h5_filename(fname),
                                complib="blosc",
                                complevel=9)

        if ('intraday' in fname):
            data_frame = data_frame.astype('float32')

        store['data'] = data_frame
        store.close()

    def get_h5_filename(self, fname):
        if fname[-3:] == '.h5':
            return fname

        return fname + ".h5"

    def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields=None):
        fname_r = self.get_h5_filename(fname)

        self.logger.info("About to dump R binary HDF5 - " + fname_r)
        data_frame32 = data_frame.astype('float32')

        if fields is None:
            fields = data_frame32.columns.values

        # decompose date/time into individual fields (easier to pick up in R)
        data_frame32['Year'] = data_frame.index.year
        data_frame32['Month'] = data_frame.index.month
        data_frame32['Day'] = data_frame.index.day
        data_frame32['Hour'] = data_frame.index.hour
        data_frame32['Minute'] = data_frame.index.minute
        data_frame32['Second'] = data_frame.index.second
        data_frame32['Millisecond'] = data_frame.index.microsecond / 1000

        data_frame32 = data_frame32[[
            'Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'
        ] + fields]

        cols = data_frame32.columns

        store_export = pandas.HDFStore(fname_r)
        store_export.put('df_for_r', data_frame32, data_columns=cols)
        store_export.close()

    def read_time_series_cache_from_disk(self, fname):
        """
        read_time_series_cache_from_disk - Reads time series cache from disk

        Parameters
        ----------
        fname : str
            file to be written too

        Returns
        -------
        DataFrame
        """

        if os.path.isfile(self.get_h5_filename(fname)):
            store = pandas.HDFStore(self.get_h5_filename(fname))
            data_frame = store.select("data")

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            store.close()

            return data_frame

        return None

    ### functions for CSV reading and writing
    def write_time_series_to_csv(self, csv_path, data_frame):
        data_frame.to_csv(csv_path)

    def read_csv_data_frame(self,
                            f_name,
                            freq,
                            cutoff=None,
                            dateparse=None,
                            postfix='.close',
                            intraday_tz='UTC',
                            excel_sheet=None):
        if (freq == 'intraday'):

            if dateparse is None:
                dateparse = lambda x: datetime.datetime(*map(
                    int,
                    [x[6:10], x[3:5], x[0:2], x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'dukascopy':
                dateparse = lambda x: datetime.datetime(*map(
                    int,
                    [x[0:4], x[5:7], x[8:10], x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'c':
                # use C library for parsing dates, several hundred times quicker
                # requires compilation of library to install
                import ciso8601
                dateparse = lambda x: ciso8601.parse_datetime(x)

            if excel_sheet is None:
                data_frame = pandas.read_csv(f_name,
                                             index_col=0,
                                             parse_dates=True,
                                             date_parser=dateparse)
            else:
                data_frame = pandas.read_excel(f_name,
                                               excel_sheet,
                                               index_col=0,
                                               na_values=['NA'])

            data_frame = data_frame.astype('float32')
            data_frame.index.names = ['Date']

            old_cols = data_frame.columns
            new_cols = []

            # add '.close' to each column name
            for col in old_cols:
                new_cols.append(col + postfix)

            data_frame.columns = new_cols
        else:
            # daily data
            if 'events' in f_name:

                data_frame = pandas.read_csv(f_name)

                # very slow conversion
                data_frame = data_frame.convert_objects(convert_dates='coerce')

            else:
                if excel_sheet is None:
                    data_frame = pandas.read_csv(f_name,
                                                 index_col=0,
                                                 parse_dates=["DATE"],
                                                 date_parser=dateparse)
                else:
                    data_frame = pandas.read_excel(f_name,
                                                   excel_sheet,
                                                   index_col=0,
                                                   na_values=['NA'])

        # convert Date to Python datetime
        # datetime data_frame['Date1'] = data_frame.index

        # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S')
        # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]),
        #                                        int(x[12:13]), int(x[15:16]), int(x[18:19])))

        # data_frame.index = data_frame['Date1']
        # data_frame.drop('Date1')

        # slower method: data_frame.index = pandas.to_datetime(data_frame.index)

        if (freq == 'intraday'):
            # assume time series are already in UTC and assign this (can specify other time zones)
            data_frame = data_frame.tz_localize(intraday_tz)

        # end cutoff date
        if cutoff is not None:
            if (isinstance(cutoff, str)):
                cutoff = parse(cutoff)

            data_frame = data_frame.loc[data_frame.index < cutoff]

        return data_frame

    def convert_csv_data_frame(self,
                               f_name,
                               category,
                               freq,
                               cutoff=None,
                               dateparse=None):
        self.logger.info("About to read... " + f_name)

        data_frame = self.read_csv_data_frame(f_name,
                                              freq,
                                              cutoff=None,
                                              dateparse=None)

        category_f_name = self.create_cache_file_name(category)

        self.write_time_series_cache_to_disk(category_f_name, data_frame)

    def clean_csv_file(self, f_name):
        with codecs.open(f_name, 'rb', 'utf-8') as myfile:
            data = myfile.read()

            # clean file first if dirty
            if data.count('\x00'):
                self.logger.info('Cleaning CSV...')

                with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of:
                    of.write(data.replace('\x00', ''))

                shutil.move(f_name + '.tmp', f_name)

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename
Beispiel #12
0
class IntradayBarRequest(Request):
    def __init__(self, symbol, interval, start=None, end=None, event='TRADE'):
        """
        Intraday bar request for bbg

        Parameters
        ----------
        symbols : string
        interval : number of minutes
        start : start date
        end : end date (if None then use today)
        event : (TRADE,BID,ASK,BEST_BID,BEST_ASK)

        """

        Request.__init__(self)

        self.logger = LoggerManager().getLogger(__name__)

        assert event in ('TRADE', 'BID', 'ASK', 'BEST_BID', 'BEST_ASK')
        assert isinstance(symbol, str)

        if start is None:
            start = datetime.today() - timedelta(30)

        if end is None:
            end = datetime.utcnow()

        self.symbol = symbol
        self.interval = interval
        self.start = to_datetime(start)
        self.end = to_datetime(end)
        self.event = event

        # response related
        self.response = defaultdict(list)

    def get_bbg_service_name(self):
        return '//blp/refdata'

    def get_bbg_request(self, svc, session):

        # create the bbg request object
        start, end = self.start, self.end
        request = svc.CreateRequest('IntradayBarRequest')
        request.Set('security', self.symbol)
        request.Set('interval', self.interval)
        request.Set('eventType', self.event)
        request.Set(
            'startDateTime',
            session.CreateDatetime(start.year, start.month, start.day,
                                   start.hour, start.minute))
        request.Set(
            'endDateTime',
            session.CreateDatetime(end.year, end.month, end.day, end.hour,
                                   end.minute))

        self.logger.info("Fetching intraday data for " + str(self.symbol) +
                         " from " + start.strftime('%d/%m/%Y') + " to " +
                         end.strftime('%d/%m/%Y'))

        return request

    def on_event(self, evt, is_final):
        """
        on_event - This is invoked from in response to COM PumpWaitingMessages - different thread

        """

        response = self.response

        self.logger.debug("Receiving data from Bloomberg...")

        for msg in XmlHelper.message_iter(evt):
            bars = msg.GetElement('barData').GetElement('barTickData')

            self.logger.debug("Read message...")

            for i in range(bars.NumValues):
                bar = bars.GetValue(i)
                ts = bar.GetElement(0).Value

                dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute)

                response['time'].append(dt)
                response['open'].append(bar.GetElement(1).Value)
                response['high'].append(bar.GetElement(2).Value)
                response['low'].append(bar.GetElement(3).Value)
                response['close'].append(bar.GetElement(4).Value)
                response['volume'].append(bar.GetElement(5).Value)
                response['events'].append(bar.GetElement(6).Value)

                if (i % 20000 == 0):
                    dt_str = dt.strftime('%d/%m/%Y')
                    self.logger.debug("Processing " + dt_str)

        self.logger.debug("Finished processing for ticker.")

        if is_final:
            idx = response.pop('time')
            self.response = DataFrame(
                response,
                columns=['open', 'high', 'low', 'close', 'volume', 'events'],
                index=idx)
            self.response.index.name = 'Date'
            self.response = self.response.astype('float32')
Beispiel #13
0
        return ConfigManager._dict_time_series_fields_list_vendor_to_library[
            source + '.' + sourcefield]

    @staticmethod
    def convert_library_to_vendor_field(source, field):
        return ConfigManager._dict_time_series_fields_list_library_to_vendor[
            source + '.' + field]


## test function
if __name__ == '__main__':
    logger = LoggerManager().getLogger(__name__)

    categories = ConfigManager().get_categories_from_fields()

    logger.info("Categories from fields list")
    print(categories)

    categories = ConfigManager().get_categories_from_tickers()

    logger.info("Categories from tickers list")
    print(categories)

    filter = 'events'

    categories_filtered = ConfigManager(
    ).get_categories_from_tickers_selective_filter(filter)
    logger.info("Categories from tickers list, filtered by events")
    print(categories_filtered)

    logger.info("For each category, print all tickers and fields")
    # have vol target for each signal
    br.signal_vol_adjust = True
    br.signal_vol_target = 0.05
    br.signal_vol_max_leverage = 3
    br.signal_vol_periods = 60
    br.signal_vol_obs_in_year = 252
    br.signal_vol_rebalance_freq = 'BM'
    br.signal_vol_resample_freq = None

    tech_params = TechParams(); tech_params.sma_period = 200; indicator = 'SMA'

    # pick USD crosses in G10 FX
    # note: we are calculating returns from spot (it is much better to use to total return
    # indices for FX, which include carry)
    logger.info("Loading asset data...")

    tickers = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD',
               'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK']

    vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS',
                      'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS']

    time_series_request = TimeSeriesRequest(
                start_date = "01 Jan 1989",                     # start date
                finish_date = datetime.date.today(),            # finish date
                freq = 'daily',                                 # daily data
                data_source = 'quandl',                         # use Quandl as data source
                tickers = tickers,                              # ticker (Thalesians)
                fields = ['close'],                                 # which fields to download
                vendor_tickers = vendor_tickers,                    # ticker (Quandl)
class LoaderQuandl(LoaderTemplate):

    def __init__(self):
        super(LoaderQuandl, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request)

        self.logger.info("Request Quandl data")

        data_frame = self.download_daily(time_series_request_vendor)

        if data_frame is None or data_frame.index is []: return None

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_tickers = data_frame.columns

        if data_frame is not None:
            # tidy up tickers into a format that is more easily translatable
            # we can often get multiple fields returned (even if we don't ask for them!)
            # convert to lower case
            returned_fields = [(x.split(' - ')[1]).lower().replace(' ', '-') for x in returned_tickers]
            returned_fields = [x.replace('value', 'close') for x in returned_fields]    # special case for close

            returned_tickers = [x.replace('.', '/') for x in returned_tickers]
            returned_tickers = [x.split(' - ')[0] for x in returned_tickers]

            fields = self.translate_from_vendor_field(returned_fields, time_series_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        self.logger.info("Completed request from Quandl.")

        return data_frame

    def download_daily(self, time_series_request):
        trials = 0

        data_frame = None

        while(trials < 5):
            try:
                data_frame = Quandl.get(time_series_request.tickers, authtoken=Constants().quandl_api_key, trim_start=time_series_request.start_date,
                            trim_end=time_series_request.finish_date)

                break
            except:
                trials = trials + 1
                self.logger.info("Attempting... " + str(trials) + " request to download from Quandl")

        if trials == 5:
            self.logger.error("Couldn't download from Quandl after several attempts!")

        return data_frame
        return ConfigManager._dict_time_series_fields_list_vendor_to_library[
            source + '.' + sourcefield]

    @staticmethod
    def convert_library_to_vendor_field(source, field):
        return ConfigManager._dict_time_series_fields_list_library_to_vendor[
            source + '.' + field]


## test function
if __name__ == '__main__':
    logger = LoggerManager().getLogger(__name__)

    categories = ConfigManager().get_categories_from_fields()

    logger.info("Categories from fields list")
    print(categories)

    categories = ConfigManager().get_categories_from_tickers()

    logger.info("Categories from tickers list")
    print(categories)

    filter = 'events'

    categories_filtered = ConfigManager().get_categories_from_tickers_selective_filter(filter)
    logger.info("Categories from tickers list, filtered by events")
    print(categories_filtered)

    logger.info("For each category, print all tickers and fields")
class HistoricalDataRequest(Request):

    def __init__(self, symbols, fields, start=None, end=None, period='DAILY', addtl_sets=None, ignore_security_error=0, ignore_field_error=0):
        """ Historical data request for bbg.

        Parameters
        ----------
        symbols : string or list
        fields : string or list
        start : start date (if None then use 1 year ago)
        end : end date (if None then use today)
        period : ('DAILY', 'WEEKLY', 'MONTHLY', 'QUARTERLY', 'SEMI-ANNUAL', 'YEARLY')
        ignore_field_errors : bool
        ignore_security_errors : bool

        """

        Request.__init__(self, ignore_security_error=ignore_security_error, ignore_field_error=ignore_field_error)

        assert period in ('DAILY', 'WEEKLY', 'MONTHLY', 'QUARTERLY', 'SEMI-ANNUAL', 'YEARLY')
        self.symbols = isinstance(symbols, str) and [symbols] or symbols
        self.fields = isinstance(fields, str) and [fields] or fields

        if start is None:
            start = datetime.today() - timedelta(365)   # by default download the past year
        if end is None:
            end = datetime.today()

        self.start = to_datetime(start)
        self.end = to_datetime(end)
        self.period = period

        self.logger = LoggerManager().getLogger(__name__)

        # response related
        self.response = {}

    def get_bbg_service_name(self):
        return '//blp/refdata'

    def get_bbg_request(self, svc, session):
        # create the bbg request object
        request = svc.CreateRequest('HistoricalDataRequest')
        [request.GetElement('securities').AppendValue(sec) for sec in self.symbols]
        [request.GetElement('fields').AppendValue(fld) for fld in self.fields]
        request.Set('startDate', self.start.strftime('%Y%m%d'))
        request.Set('endDate', self.end.strftime('%Y%m%d'))
        request.Set('periodicitySelection', self.period)

        o = request.GetElement('overrides').AppendElment()
        o.SetElement('fieldId', 'TIME_ZONE_OVERRIDE')
        o.SetElement('value', 'GMT')

        return request

    def on_security_data_node(self, node):
        """ process a securityData node - FIXME: currently not handling relateDate node """
        sid = XmlHelper.get_child_value(node, 'security')
        farr = node.GetElement('fieldData')
        dmap = defaultdict(list)

        self.logger.info("Fetching ticker " + sid)

        for i in range(farr.NumValues):
            pt = farr.GetValue(i)
            [dmap[f].append(XmlHelper.get_child_value(pt, f)) for f in ['date'] + self.fields]

        self.logger.info("Returning ticker " + sid)

        idx = dmap.pop('date')
        frame = DataFrame(dmap, columns=self.fields, index=idx)
        frame.index.name = 'date'
        self.response[sid] = frame

    def on_event(self, evt, is_final):
        """
        on_event - This is invoked from in response to COM PumpWaitingMessages - different thread

        """

        for msg in XmlHelper.message_iter(evt):
            # Single security element in historical request
            node = msg.GetElement('securityData')
            if node.HasElement('securityError'):
                self.security_errors.append(XmlHelper.as_security_error(node.GetElement('securityError')))
            else:
                self.on_security_data_node(node)

    def response_as_single(self, copy=0):
        """
        response_as_single - convert the response map to a single data frame with Multi-Index columns

        """

        arr = []

        for sid, frame in self.response.items():
            if copy:
                frame = frame.copy()
            'security' not in frame and frame.insert(0, 'security', sid)
            arr.append(frame.reset_index().set_index(['date', 'security']))

        # time.sleep(1000)
        if (arr == []): return arr

        return concat(arr).unstack()

    def response_as_panel(self, swap=False):
        panel = Panel(self.response)
        if swap:
            panel = panel.swapaxes('items', 'minor')
        return panel
class IntradayBarRequest(Request):

    def __init__(self, symbol, interval, start=None, end=None, event='TRADE'):
        """
        Intraday bar request for bbg

        Parameters
        ----------
        symbols : string
        interval : number of minutes
        start : start date
        end : end date (if None then use today)
        event : (TRADE,BID,ASK,BEST_BID,BEST_ASK)

        """

        Request.__init__(self)

        self.logger = LoggerManager().getLogger(__name__)

        assert event in ('TRADE', 'BID', 'ASK', 'BEST_BID', 'BEST_ASK')
        assert isinstance(symbol, str)

        if start is None:
            start = datetime.today() - timedelta(30)

        if end is None:
            end = datetime.utcnow()

        self.symbol = symbol
        self.interval = interval
        self.start = to_datetime(start)
        self.end = to_datetime(end)
        self.event = event

        # response related
        self.response = defaultdict(list)

    def get_bbg_service_name(self):
        return '//blp/refdata'

    def get_bbg_request(self, svc, session):

        # create the bbg request object
        start, end = self.start, self.end
        request = svc.CreateRequest('IntradayBarRequest')
        request.Set('security', self.symbol)
        request.Set('interval', self.interval)
        request.Set('eventType', self.event)
        request.Set('startDateTime', session.CreateDatetime(start.year, start.month, start.day, start.hour, start.minute))
        request.Set('endDateTime', session.CreateDatetime(end.year, end.month, end.day, end.hour, end.minute))

        self.logger.info("Fetching intraday data for " + str(self.symbol) + " from "
                         + start.strftime('%d/%m/%Y') + " to " + end.strftime('%d/%m/%Y'))

        return request

    def on_event(self, evt, is_final):
        """
        on_event - This is invoked from in response to COM PumpWaitingMessages - different thread

        """

        response = self.response

        self.logger.debug("Receiving data from Bloomberg...")

        for msg in XmlHelper.message_iter(evt):
            bars = msg.GetElement('barData').GetElement('barTickData')

            self.logger.debug("Read message...")

            for i in range(bars.NumValues):
                bar = bars.GetValue(i)
                ts = bar.GetElement(0).Value

                dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute)

                response['time'].append(dt)
                response['open'].append(bar.GetElement(1).Value)
                response['high'].append(bar.GetElement(2).Value)
                response['low'].append(bar.GetElement(3).Value)
                response['close'].append(bar.GetElement(4).Value)
                response['volume'].append(bar.GetElement(5).Value)
                response['events'].append(bar.GetElement(6).Value)

                if (i % 20000 == 0):
                    dt_str = dt.strftime('%d/%m/%Y')
                    self.logger.debug("Processing " + dt_str)

        self.logger.debug("Finished processing for ticker.")

        if is_final:
            idx = response.pop('time')
            self.response = DataFrame(response, columns=['open', 'high', 'low', 'close', 'volume', 'events'],
                                      index=idx)
            self.response.index.name = 'Date'
            self.response = self.response.astype('float32')
Beispiel #19
0
    br.ann_factor = 252

    # have vol target for each signal
    br.signal_vol_adjust = True
    br.signal_vol_target = 0.05
    br.signal_vol_max_leverage = 3
    br.signal_vol_periods = 60
    br.signal_vol_obs_in_year = 252
    br.signal_vol_rebalance_freq = 'BM'

    tech_params = TechParams(); tech_params.sma_period = 200; indicator = 'SMA'

    # pick USD crosses in G10 FX
    # note: we are calculating returns from spot (it is much better to use to total return
    # indices for FX, which include carry)
    logger.info("Loading asset data...")

    tickers = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD',
               'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK']

    vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS',
                      'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS']

    time_series_request = TimeSeriesRequest(
                start_date = "01 Jan 1989",                     # start date
                finish_date = datetime.date.today(),            # finish date
                freq = 'daily',                                 # daily data
                data_source = 'quandl',                         # use Quandl as data source
                tickers = tickers,                              # ticker (Thalesians)
                fields = ['close'],                                 # which fields to download
                vendor_tickers = vendor_tickers,                    # ticker (Quandl)
Beispiel #20
0
class CreateDataIndexTemplate:
    def __init__(self):
        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        return

    @abc.abstractmethod
    def create_indicator(self):
        return

    @abc.abstractmethod
    def aggregate_news_data(self, raw_database):
        return

    @abc.abstractmethod
    def get_cached_aggregate(self):
        return

    def grab_indicator(self):
        return self.indicator

    def grab_econ_indicator(self):
        return self.indicator_econ

    def grab_final_indicator(self):
        return self.indicator_final

    def truncate_indicator(self, daily_ind, match):
        cols = daily_ind.columns.values

        to_include = []

        for i in range(0, len(cols)):
            if match in cols[i]:
                to_include.append(i)

        return daily_ind[daily_ind.columns[to_include]]

    def dump_indicators(self):
        tsf = TimeSeriesFilter()
        self.logger.info("About to write all indicators to CSV")
        self.indicator.to_csv(self._csv_indicator_dump, date_format='%d/%m/%Y')

        if (self._csv_econ_indicator_dump is not None):
            self.logger.info("About to write economy based indicators to CSV")
            self.indicator_econ.to_csv(self._csv_econ_indicator_dump,
                                       date_format='%d/%m/%Y')

        self.logger.info("About to write final indicators to CSV")

        # remove weekends and remove start of series
        if (self._csv_final_indicator_dump is not None):
            indicator_final_copy = tsf.filter_time_series_by_holidays(
                self.indicator_final, cal='WEEKDAY')
            indicator_final_copy = tsf.filter_time_series_by_date(
                start_date="01 Jan 2000",
                finish_date=None,
                data_frame=indicator_final_copy)

            indicator_final_copy.to_csv(self._csv_final_indicator_dump,
                                        date_format='%d/%m/%Y')
Beispiel #21
0
class BBGLowLevelTick(BBGLowLevelTemplate):
    def __init__(self):
        super(BBGLowLevelTick, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

        # constants
        self.TICK_DATA = blpapi.Name("tickData")
        self.COND_CODE = blpapi.Name("conditionCodes")
        self.TICK_SIZE = blpapi.Name("size")
        self.TIME = blpapi.Name("time")
        self.TYPE = blpapi.Name("type")
        self.VALUE = blpapi.Name("value")
        self.RESPONSE_ERROR = blpapi.Name("responseError")
        self.CATEGORY = blpapi.Name("category")
        self.MESSAGE = blpapi.Name("message")
        self.SESSION_TERMINATED = blpapi.Name("SessionTerminated")

    def combine_slices(self, data_frame, data_frame_slice):
        return data_frame.append(data_frame_slice)

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers[
            0]  # get 1st ticker only!
        self._options.event = time_series_request.trade_side.upper()
        # self._options.barInterval = time_series_request.freq_mult
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        # self._options.gapFillInitialBar = False

        if hasattr(self._options.startDateTime, 'microsecond'):
            self._options.startDateTime = self._options.startDateTime.replace(
                microsecond=0)

        if hasattr(self._options.endDateTime, 'microsecond'):
            self._options.endDateTime = self._options.endDateTime.replace(
                microsecond=0)

        return self._options

    # iterate through Bloomberg output creating a DataFrame output
    # implements abstract method
    def process_message(self, msg):
        data = msg.getElement(self.TICK_DATA).getElement(self.TICK_DATA)

        self.logger.info("Processing tick data for " +
                         str(self._options.security))
        tuple = []

        data_vals = data.values()

        # for item in list(data_vals):
        #     if item.hasElement(self.COND_CODE):
        #         cc = item.getElementAsString(self.COND_CODE)
        #     else:
        #         cc = ""
        #
        #     # each price time point has multiple fields - marginally quicker
        #     tuple.append(([item.getElementAsFloat(self.VALUE),
        #                     item.getElementAsInteger(self.TICK_SIZE)],
        #                     item.getElementAsDatetime(self.TIME)))

        # slightly faster this way (note, we are skipping trade & CC fields)
        tuple = [([
            item.getElementAsFloat(self.VALUE),
            item.getElementAsInteger(self.TICK_SIZE)
        ], item.getElementAsDatetime(self.TIME)) for item in data_vals]

        data_table = list(map(itemgetter(0), tuple))
        time_list = list(map(itemgetter(1), tuple))

        try:
            self.logger.info("Dates between " + str(time_list[0]) + " - " +
                             str(time_list[-1]))
        except:
            self.logger.info("No dates retrieved")
            return None

        # create pandas dataframe with the Bloomberg output
        return pandas.DataFrame(data=data_table,
                                index=time_list,
                                columns=['close', 'ticksize'])

    # implement abstract method: create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("IntradayTickRequest")

        # only one security/eventType per request
        request.set("security", self._options.security)
        request.getElement("eventTypes").appendValue("TRADE")
        # request.set("eventTypes", self._options.event)
        request.set("includeConditionCodes", True)

        # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT')

        if self._options.startDateTime and self._options.endDateTime:
            request.set("startDateTime", self._options.startDateTime)
            request.set("endDateTime", self._options.endDateTime)

        self.logger.info("Sending Tick Bloomberg Request...")

        session.sendRequest(request)
Beispiel #22
0
class BBGLowLevelIntraday(BBGLowLevelTemplate):
    def __init__(self):
        super(BBGLowLevelIntraday, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

        # constants
        self.BAR_DATA = blpapi.Name("barData")
        self.BAR_TICK_DATA = blpapi.Name("barTickData")
        self.OPEN = blpapi.Name("open")
        self.HIGH = blpapi.Name("high")
        self.LOW = blpapi.Name("low")
        self.CLOSE = blpapi.Name("close")
        self.VOLUME = blpapi.Name("volume")
        self.NUM_EVENTS = blpapi.Name("numEvents")
        self.TIME = blpapi.Name("time")

    def combine_slices(self, data_frame, data_frame_slice):
        return data_frame.append(data_frame_slice)

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()
        self._options.security = time_series_request.tickers[
            0]  # get 1st ticker only!
        self._options.event = "TRADE"
        self._options.barInterval = time_series_request.freq_mult
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        self._options.gapFillInitialBar = False

        if hasattr(self._options.startDateTime, 'microsecond'):
            self._options.startDateTime = self._options.startDateTime.replace(
                microsecond=0)

        if hasattr(self._options.endDateTime, 'microsecond'):
            self._options.endDateTime = self._options.endDateTime.replace(
                microsecond=0)

        return self._options

    # iterate through Bloomberg output creating a DataFrame output
    # implements abstract method
    def process_message(self, msg):
        data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA)

        self.logger.info("Processing intraday data for " +
                         str(self._options.security))

        data_vals = list(data.values())

        # data_matrix = numpy.zeros([len(data_vals), 6])
        # data_matrix.fill(numpy.nan)
        #
        # date_index = [None] * len(data_vals)
        #
        # for i in range(0, len(data_vals)):
        #     data_matrix[i][0] = data_vals[i].getElementAsFloat(self.OPEN)
        #     data_matrix[i][1] = data_vals[i].getElementAsFloat(self.HIGH)
        #     data_matrix[i][2] = data_vals[i].getElementAsFloat(self.LOW)
        #     data_matrix[i][3] = data_vals[i].getElementAsFloat(self.CLOSE)
        #     data_matrix[i][4] = data_vals[i].getElementAsInteger(self.VOLUME)
        #     data_matrix[i][5] = data_vals[i].getElementAsInteger(self.NUM_EVENTS)
        #
        #     date_index[i] = data_vals[i].getElementAsDatetime(self.TIME)
        #
        # self.logger.info("Dates between " + str(date_index[0]) + " - " + str(date_index[-1]))
        #
        # # create pandas dataframe with the Bloomberg output
        # return pandas.DataFrame(data = data_matrix, index = date_index,
        #                columns=['open', 'high', 'low', 'close', 'volume', 'events'])

        ## for loop method is touch slower
        # time_list = []
        # data_table = []

        # for bar in data_vals:
        #     data_table.append([bar.getElementAsFloat(self.OPEN),
        #                  bar.getElementAsFloat(self.HIGH),
        #                  bar.getElementAsFloat(self.LOW),
        #                  bar.getElementAsFloat(self.CLOSE),
        #                  bar.getElementAsInteger(self.VOLUME),
        #                  bar.getElementAsInteger(self.NUM_EVENTS)])
        #
        #     time_list.append(bar.getElementAsDatetime(self.TIME))

        # each price time point has multiple fields - marginally quicker
        tuple = [([
            bar.getElementAsFloat(self.OPEN),
            bar.getElementAsFloat(self.HIGH),
            bar.getElementAsFloat(self.LOW),
            bar.getElementAsFloat(self.CLOSE),
            bar.getElementAsInteger(self.VOLUME),
            bar.getElementAsInteger(self.NUM_EVENTS)
        ], bar.getElementAsDatetime(self.TIME)) for bar in data_vals]

        data_table = list(map(itemgetter(0), tuple))
        time_list = list(map(itemgetter(1), tuple))

        try:
            self.logger.info("Dates between " + str(time_list[0]) + " - " +
                             str(time_list[-1]))
        except:
            self.logger.info("No dates retrieved")
            return None

        # create pandas dataframe with the Bloomberg output
        return pandas.DataFrame(
            data=data_table,
            index=time_list,
            columns=['open', 'high', 'low', 'close', 'volume', 'events'])

    # implement abstract method: create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("IntradayBarRequest")

        # only one security/eventType per request
        request.set("security", self._options.security)
        request.set("eventType", self._options.event)
        request.set("interval", self._options.barInterval)

        # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT')

        if self._options.startDateTime and self._options.endDateTime:
            request.set("startDateTime", self._options.startDateTime)
            request.set("endDateTime", self._options.endDateTime)

        if self._options.gapFillInitialBar:
            request.append("gapFillInitialBar", True)

        self.logger.info("Sending Intraday Bloomberg Request...")

        session.sendRequest(request)
class TimeSeriesIO:

    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)

    ### functions to handle Excel on disk
    def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False):
        """
        write_time_series_to_excel - writes Pandas data frame to disk in Excel format

        Parameters
        ----------
        fname : str
            Excel filename to be written to
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        create_new : boolean
            to create a new Excel file
        """

        if(create_new):
            writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
        else:
            if os.path.isfile(fname):
                book = load_workbook(fname)
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
                writer.book = book
                writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
            else:
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')

        data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter')

        writer.save()
        writer.close()

    def write_time_series_to_excel_writer(self, writer, sheet, data_frame):
        data_frame.to_excel(writer, sheet, engine='xlsxwriter')

    def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff = None, dateparse = None,
                            postfix = '.close', intraday_tz = 'UTC'):

        return self.read_csv_data_frame(f_name, freq, cutoff = cutoff, dateparse = dateparse,
                            postfix = postfix, intraday_tz = intraday_tz, excel_sheet = excel_sheet)

    ### functions to handle HDF5 on disk
    def write_time_series_cache_to_disk(self, fname, data_frame):
        """
        write_time_series_cache_to_disk - writes Pandas data frame to disk as HDF5 format

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        """

        store = pandas.HDFStore(self.get_h5_filename(fname), complib="blosc", complevel=9)

        if ('intraday' in fname):
            data_frame = data_frame.astype('float32')

        store['data'] = data_frame
        store.close()

    def get_h5_filename(self, fname):
        if fname[-3:] == '.h5':
            return fname

        return fname + ".h5"

    def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields = None):
        fname_r = self.get_h5_filename(fname)

        self.logger.info("About to dump R binary HDF5 - " + fname_r)
        data_frame32 = data_frame.astype('float32')

        if fields is None:
            fields = data_frame32.columns.values

        # decompose date/time into individual fields (easier to pick up in R)
        data_frame32['Year'] = data_frame.index.year
        data_frame32['Month'] = data_frame.index.month
        data_frame32['Day'] = data_frame.index.day
        data_frame32['Hour'] = data_frame.index.hour
        data_frame32['Minute'] = data_frame.index.minute
        data_frame32['Second'] = data_frame.index.second
        data_frame32['Millisecond'] = data_frame.index.microsecond / 1000

        data_frame32 = data_frame32[
            ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'] + fields]

        cols = data_frame32.columns

        store_export = pandas.HDFStore(fname_r)
        store_export.put('df_for_r', data_frame32, data_columns=cols)
        store_export.close()

    def read_time_series_cache_from_disk(self, fname):
        """
        read_time_series_cache_from_disk - Reads time series cache from disk

        Parameters
        ----------
        fname : str
            file to be written too

        Returns
        -------
        DataFrame
        """

        if os.path.isfile(self.get_h5_filename(fname)):
            store = pandas.HDFStore(self.get_h5_filename(fname))
            data_frame = store.select("data")

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            store.close()

            return data_frame

        return None

    ### functions for CSV reading and writing
    def write_time_series_to_csv(self, csv_path, data_frame):
        data_frame.to_csv(csv_path)

    def read_csv_data_frame(self, f_name, freq, cutoff = None, dateparse = None,
                            postfix = '.close', intraday_tz = 'UTC', excel_sheet = None):
        if(freq == 'intraday'):

            if dateparse is None:
                dateparse = lambda x: datetime.datetime(*map(int, [x[6:10], x[3:5], x[0:2],
                                                   x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'dukascopy':
                dateparse = lambda x: datetime.datetime(*map(int, [x[0:4], x[5:7], x[8:10],
                                                   x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'c':
                # use C library for parsing dates, several hundred times quicker
                # requires compilation of library to install
                import ciso8601
                dateparse = lambda x: ciso8601.parse_datetime(x)

            if excel_sheet is None:
                data_frame = pandas.read_csv(f_name, index_col = 0, parse_dates = True, date_parser = dateparse)
            else:
                data_frame = pandas.read_excel(f_name, excel_sheet, index_col = 0, na_values=['NA'])

            data_frame = data_frame.astype('float32')
            data_frame.index.names = ['Date']

            old_cols = data_frame.columns
            new_cols = []

            # add '.close' to each column name
            for col in old_cols:
                new_cols.append(col + postfix)

            data_frame.columns = new_cols
        else:
            # daily data
            if 'events' in f_name:

                data_frame = pandas.read_csv(f_name)

                # very slow conversion
                data_frame = data_frame.convert_objects(convert_dates = 'coerce')

            else:
                if excel_sheet is None:
                    data_frame = pandas.read_csv(f_name, index_col=0, parse_dates =["DATE"], date_parser = dateparse)
                else:
                    data_frame = pandas.read_excel(f_name, excel_sheet, index_col = 0, na_values=['NA'])

        # convert Date to Python datetime
        # datetime data_frame['Date1'] = data_frame.index

        # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S')
        # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]),
        #                                        int(x[12:13]), int(x[15:16]), int(x[18:19])))

        # data_frame.index = data_frame['Date1']
        # data_frame.drop('Date1')

        # slower method: data_frame.index = pandas.to_datetime(data_frame.index)

        if(freq == 'intraday'):
            # assume time series are already in UTC and assign this (can specify other time zones)
            data_frame = data_frame.tz_localize(intraday_tz)

        # end cutoff date
        if cutoff is not None:
            if (isinstance(cutoff, str)):
                cutoff = parse(cutoff)

            data_frame = data_frame.loc[data_frame.index < cutoff]

        return data_frame

    def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None):
        self.logger.info("About to read... " + f_name)

        data_frame = self.read_csv_data_frame(f_name, freq, cutoff=None, dateparse=None)

        category_f_name = self.create_cache_file_name(category)

        self.write_time_series_cache_to_disk(
            category_f_name, data_frame)

    def clean_csv_file(self, f_name):
        with codecs.open (f_name, 'rb', 'utf-8') as myfile:
            data = myfile.read()

            # clean file first if dirty
            if data.count( '\x00' ):
                self.logger.info('Cleaning CSV...')

                with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of:
                    of.write(data.replace('\x00', ''))

                shutil.move(f_name + '.tmp', f_name)

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename
Beispiel #24
0
class LoaderBBG(LoaderTemplate):
    def __init__(self):
        super(LoaderBBG, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        time_series_request_vendor = self.construct_vendor_time_series_request(
            time_series_request)

        data_frame = None
        self.logger.info("Request Bloomberg data")

        # do we need daily or intraday data?
        if (time_series_request.freq
                in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']):

            # for events times/dates separately needs ReferenceDataRequest (when specified)
            if 'release-date-time-full' in time_series_request.fields:
                # experimental
                datetime_data_frame = self.get_reference_data(
                    time_series_request_vendor, time_series_request)

                # remove fields 'release-date-time-full' from our request (and the associated field in the vendor)
                index = time_series_request.fields.index(
                    'release-date-time-full')
                time_series_request_vendor.fields.pop(index)
                time_series_request.fields.pop(index)

                # download all the other event fields (uses HistoricalDataRequest to Bloomberg)
                # concatenate with date time fields
                if len(time_series_request_vendor.fields) > 0:
                    events_data_frame = self.get_daily_data(
                        time_series_request, time_series_request_vendor)

                    col = events_data_frame.index.name
                    events_data_frame = events_data_frame.reset_index(
                        drop=False)

                    data_frame = pandas.concat(
                        [events_data_frame, datetime_data_frame], axis=1)
                    temp = data_frame[col]
                    del data_frame[col]
                    data_frame.index = temp
                else:
                    data_frame = datetime_data_frame

            # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg
            else:
                data_frame = self.get_daily_data(time_series_request,
                                                 time_series_request_vendor)

        # assume one ticker only
        # for intraday data we use IntradayDataRequest to Bloomberg
        if (time_series_request.freq in ['intraday', 'minute', 'hourly']):
            time_series_request_vendor.tickers = time_series_request_vendor.tickers[
                0]

            data_frame = self.download_intraday(time_series_request_vendor)

            if data_frame is not None:
                if data_frame.empty:
                    self.logger.info("No tickers returned for: " +
                                     time_series_request_vendor.tickers)

                    return None

                cols = data_frame.columns.values
                data_frame.tz_localize('UTC')
                cols = time_series_request.tickers[0] + "." + cols
                data_frame.columns = cols

        self.logger.info("Completed request from Bloomberg.")

        return data_frame

    def get_daily_data(self, time_series_request, time_series_request_vendor):
        data_frame = self.download_daily(time_series_request_vendor)

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            if data_frame.empty:
                self.logger.info("No tickers returned for...")

                try:
                    self.logger.info(str(time_series_request_vendor.tickers))
                except:
                    pass

                return None

            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields,
                                                      time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def get_reference_data(self, time_series_request_vendor,
                           time_series_request):
        end = datetime.datetime.today()
        end = end.replace(year=end.year + 1)

        time_series_request_vendor.finish_date = end

        self.logger.debug("Requesting ref for " +
                          time_series_request_vendor.tickers[0] + " etc.")

        data_frame = self.download_ref(time_series_request_vendor)

        self.logger.debug("Waiting for ref...")

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields,
                                                      time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined

            data_frame = data_frame.convert_objects(convert_dates='coerce',
                                                    convert_numeric='coerce')

        return data_frame

    # implement method in abstract superclass
    @abc.abstractmethod
    def kill_session(self):
        return

    @abc.abstractmethod
    def download_intraday(self, time_series_request):
        return

    @abc.abstractmethod
    def download_daily(self, time_series_request):
        return

    @abc.abstractmethod
    def download_ref(self, time_series_request):
        return
Beispiel #25
0
class BBGLowLevelRef(BBGLowLevelTemplate):

    def __init__(self):
        super(BBGLowLevelRef, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
        self._options = []

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        self._options.fields = time_series_request.fields

        return self._options

    def process_message(self, msg):
        data = collections.defaultdict(dict)

        # process received events
        securityDataArray = msg.getElement('securityData')

        index = 0

        for securityData in list(securityDataArray.values()):
            ticker = securityData.getElementAsString("security")
            fieldData = securityData.getElement("fieldData")

            for field in fieldData.elements():
                if not field.isValid():
                    field_name = "%s" % field.name()

                    self.logger.error(field_name + " is NULL")
                elif field.isArray():
                    # iterate over complex data returns.
                    field_name = "%s" % field.name()

                    for i, row in enumerate(field.values()):
                        data[(field_name, ticker)][index] = re.findall(r'"(.*?)"', "%s" % row)[0]

                        index = index + 1
                # else:
                    # vals.append(re.findall(r'"(.*?)"', "%s" % row)[0])
                    # print("%s = %s" % (field.name(), field.getValueAsString()))

            fieldExceptionArray = securityData.getElement("fieldExceptions")

            for fieldException in list(fieldExceptionArray.values()):
                errorInfo = fieldException.getElement("errorInfo")
                print(errorInfo.getElementAsString("category"), ":", \
                    fieldException.getElementAsString("fieldId"))

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not(data_frame.empty)):
            data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            self.logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    def combine_slices(self, data_frame, data_frame_slice):
        if (data_frame_slice.columns.get_level_values(1).values[0]
            not in data_frame.columns.get_level_values(1).values):

            return data_frame.join(data_frame_slice, how="outer")

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest('ReferenceDataRequest')

        self.add_override(request, 'TIME_ZONE_OVERRIDE', 23)    # force GMT time
        self.add_override(request, 'START_DT', self._options.startDateTime.strftime('%Y%m%d'))
        self.add_override(request, 'END_DT', self._options.endDateTime.strftime('%Y%m%d'))

        # only one security/eventType per request
        for field in self._options.fields:
            request.getElement("fields").appendValue(field)

        for security in self._options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Ref Request:" + str(request))
        session.sendRequest(request)
Beispiel #26
0
class TimeSeriesIO:

    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)

    ### functions to handle Excel on disk
    def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False):
        """
        write_time_series_to_excel - writes Pandas data frame to disk in Excel format

        Parameters
        ----------
        fname : str
            Excel filename to be written to
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        create_new : boolean
            to create a new Excel file
        """

        if(create_new):
            writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
        else:
            if os.path.isfile(fname):
                book = load_workbook(fname)
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
                writer.book = book
                writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
            else:
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')

        data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter')

        writer.save()
        writer.close()

    def write_time_series_to_excel_writer(self, writer, sheet, data_frame):
        """
        write_time_series_to_excel_writer - writes Pandas data frame to disk in Excel format for a writer

        Parameters
        ----------
        writer : ExcelWriter
            File handle to use for writing Excel file to disk
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        """
        data_frame.to_excel(writer, sheet, engine='xlsxwriter')

    def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff = None, dateparse = None,
                            postfix = '.close', intraday_tz = 'UTC'):
        """
        read_excel_data_frame - Reads Excel from disk into DataFrame

        Parameters
        ----------
        f_name : str
            Excel file path to read
        freq : str
            Frequency of data to read (intraday/daily etc)
        cutoff : DateTime (optional)
            end date to read up to
        dateparse : str (optional)
            date parser to use
        postfix : str (optional)
            postfix to add to each columns
        intraday_tz : str
            timezone of file if uses intraday data

        Returns
        -------
        DataFrame
        """

        return self.read_csv_data_frame(f_name, freq, cutoff = cutoff, dateparse = dateparse,
                            postfix = postfix, intraday_tz = intraday_tz, excel_sheet = excel_sheet)

    ### functions to handle HDF5 on disk
    def write_time_series_cache_to_disk(self, fname, data_frame, use_bcolz = False):
        """
        write_time_series_cache_to_disk - writes Pandas data frame to disk as HDF5 format or bcolz format

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        """

        if (use_bcolz):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            data_frame.columns = self.find_replace_chars(data_frame.columns, _invalid_chars, _replace_chars)
            data_frame.columns = ['A_' + x for x in data_frame.columns]

            data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns')

            bcolzpath = self.get_bcolz_filename(fname)
            shutil.rmtree(bcolzpath, ignore_errors=True)
            zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath)
        else:
            h5_filename_temp = self.get_h5_filename(fname + ".temp")
            h5_filename = self.get_h5_filename(fname)

            # delete the old copy
            try:
                # os.remove(h5_filename_temp)
                temp = 0
            except: pass

            store = pandas.HDFStore(h5_filename_temp, complib="blosc", complevel=9)

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            store['data'] = data_frame
            store.close()

            # delete the old copy
            try:
                os.remove(h5_filename)
            except: pass

            # once written to disk rename
            os.rename(h5_filename_temp, h5_filename)

    def get_h5_filename(self, fname):
        """
        get_h5_filename - Strips h5 off filename returning first portion of filename

        Parameters
        ----------
        fname : str
            h5 filename to strip

        Returns
        -------
        str
        """
        if fname[-3:] == '.h5':
            return fname

        return fname + ".h5"

    def get_bcolz_filename(self, fname):
        """
        get_bcolz_filename - Strips h5 off filename returning first portion of filename

        Parameters
        ----------
        fname : str
            h5 filename to strip

        Returns
        -------
        str
        """
        if fname[-6:] == '.bcolz':
            return fname

        return fname + ".bcolz"

    def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields = None):
        """
        write_r_compatible_hdf_dataframe - Write a DataFrame to disk in as an R compatible HDF5 file

        Parameters
        ----------
        data_frame : DataFrame
            data frame to be written
        fname : str
            file path to be written
        fields : list(str)
            columns to be written
        """
        fname_r = self.get_h5_filename(fname)

        self.logger.info("About to dump R binary HDF5 - " + fname_r)
        data_frame32 = data_frame.astype('float32')

        if fields is None:
            fields = data_frame32.columns.values

        # decompose date/time into individual fields (easier to pick up in R)
        data_frame32['Year'] = data_frame.index.year
        data_frame32['Month'] = data_frame.index.month
        data_frame32['Day'] = data_frame.index.day
        data_frame32['Hour'] = data_frame.index.hour
        data_frame32['Minute'] = data_frame.index.minute
        data_frame32['Second'] = data_frame.index.second
        data_frame32['Millisecond'] = data_frame.index.microsecond / 1000

        data_frame32 = data_frame32[
            ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'] + fields]

        cols = data_frame32.columns

        store_export = pandas.HDFStore(fname_r)
        store_export.put('df_for_r', data_frame32, data_columns=cols)
        store_export.close()

    def read_time_series_cache_from_disk(self, fname, use_bcolz = False):
        """
        read_time_series_cache_from_disk - Reads time series cache from disk in either HDF5 or bcolz

        Parameters
        ----------
        fname : str
            file to be read from

        Returns
        -------
        DataFrame
        """

        if (use_bcolz):
            try:
                name = self.get_bcolz_filename(fname)
                zlens = bcolz.open(rootdir=name)
                data_frame = zlens.todataframe()

                data_frame.index = pandas.DatetimeIndex(data_frame['DTS_'])
                data_frame.index.name = 'Date'
                del data_frame['DTS_']

                # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas
                data_frame.columns = self.find_replace_chars(data_frame.columns, _replace_chars, _invalid_chars)
                data_frame.columns = [x[2:] for x in data_frame.columns]

                return data_frame
            except:
                return None

        elif os.path.isfile(self.get_h5_filename(fname)):
            store = pandas.HDFStore(self.get_h5_filename(fname))
            data_frame = store.select("data")

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            store.close()

            return data_frame

        return None

    ### functions for CSV reading and writing
    def write_time_series_to_csv(self, csv_path, data_frame):
        data_frame.to_csv(csv_path)

    def read_csv_data_frame(self, f_name, freq, cutoff = None, dateparse = None,
                            postfix = '.close', intraday_tz = 'UTC', excel_sheet = None):
        """
        read_csv_data_frame - Reads CSV/Excel from disk into DataFrame

        Parameters
        ----------
        f_name : str
            CSV/Excel file path to read
        freq : str
            Frequency of data to read (intraday/daily etc)
        cutoff : DateTime (optional)
            end date to read up to
        dateparse : str (optional)
            date parser to use
        postfix : str (optional)
            postfix to add to each columns
        intraday_tz : str (optional)
            timezone of file if uses intraday data
        excel_sheet : str (optional)
            Excel sheet to be read

        Returns
        -------
        DataFrame
        """

        if(freq == 'intraday'):

            if dateparse is None:
                dateparse = lambda x: datetime.datetime(*map(int, [x[6:10], x[3:5], x[0:2],
                                                   x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'dukascopy':
                dateparse = lambda x: datetime.datetime(*map(int, [x[0:4], x[5:7], x[8:10],
                                                   x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'c':
                # use C library for parsing dates, several hundred times quicker
                # requires compilation of library to install
                import ciso8601
                dateparse = lambda x: ciso8601.parse_datetime(x)

            if excel_sheet is None:
                data_frame = pandas.read_csv(f_name, index_col = 0, parse_dates = True, date_parser = dateparse)
            else:
                data_frame = pandas.read_excel(f_name, excel_sheet, index_col = 0, na_values=['NA'])

            data_frame = data_frame.astype('float32')
            data_frame.index.names = ['Date']

            old_cols = data_frame.columns
            new_cols = []

            # add '.close' to each column name
            for col in old_cols:
                new_cols.append(col + postfix)

            data_frame.columns = new_cols
        else:
            # daily data
            if 'events' in f_name:

                data_frame = pandas.read_csv(f_name)

                # very slow conversion
                data_frame = data_frame.convert_objects(convert_dates = 'coerce')

            else:
                if excel_sheet is None:
                    try:
                        data_frame = pandas.read_csv(f_name, index_col=0, parse_dates =["DATE"], date_parser = dateparse)
                    except:
                        data_frame = pandas.read_csv(f_name, index_col=0, parse_dates =["Date"], date_parser = dateparse)
                else:
                    data_frame = pandas.read_excel(f_name, excel_sheet, index_col = 0, na_values=['NA'])

        # convert Date to Python datetime
        # datetime data_frame['Date1'] = data_frame.index

        # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S')
        # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]),
        #                                        int(x[12:13]), int(x[15:16]), int(x[18:19])))

        # data_frame.index = data_frame['Date1']
        # data_frame.drop('Date1')

        # slower method: data_frame.index = pandas.to_datetime(data_frame.index)

        if(freq == 'intraday'):
            # assume time series are already in UTC and assign this (can specify other time zones)
            data_frame = data_frame.tz_localize(intraday_tz)

        # end cutoff date
        if cutoff is not None:
            if (isinstance(cutoff, str)):
                cutoff = parse(cutoff)

            data_frame = data_frame.loc[data_frame.index < cutoff]

        return data_frame

    def find_replace_chars(self, array, to_find, replace_with):

        for i in range(0, len(to_find)):
            array = [x.replace(to_find[i], replace_with[i]) for x in array]

        return array

    def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None):
        """
        convert_csv_data_frame - Converts CSV file to HDF5 file

        Parameters
        ----------
        f_name : str
            File name to be read
        category : str
            data category of file (used in HDF5 filename)
        freq : str
            intraday/daily frequency (used in HDF5 filename)
        cutoff : DateTime (optional)
            filter dates up to here
        dateparse : str
            date parser to use
        """

        self.logger.info("About to read... " + f_name)

        data_frame = self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse)

        category_f_name = self.create_cache_file_name(category)

        self.write_time_series_cache_to_disk(
            category_f_name, data_frame)

    def clean_csv_file(self, f_name):
        """
        clean_csv_file - Cleans up CSV file (removing empty characters) before writing back to disk

        Parameters
        ----------
        f_name : str
            CSV file to be cleaned
        """

        with codecs.open (f_name, 'rb', 'utf-8') as myfile:
            data = myfile.read()

            # clean file first if dirty
            if data.count( '\x00' ):
                self.logger.info('Cleaning CSV...')

                with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of:
                    of.write(data.replace('\x00', ''))

                shutil.move(f_name + '.tmp', f_name)

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename
Beispiel #27
0
                for no in thread_no:
                    for key in Constants.time_series_factory_thread_no:
                        Constants.time_series_factory_thread_no[key] = no

                    import time
                    start = time.time()
                    df = ltsf.harvest_time_series(time_series_request)
                    end = time.time()
                    duration = end - start

                    diag.append("With " + str(no) + " " + tech + " no: " +
                                str(duration) + " seconds")

            for d in diag:
                logger.info(d)

        ###### download intraday data from Bloomberg for FX, with different threading techniques
        if True:

            from datetime import timedelta

            time_series_request = TimeSeriesRequest(
                start_date=datetime.date.today() -
                timedelta(days=10),  # start date
                finish_date=datetime.date.today(),  # finish date
                freq='intraday',  # intraday data
                data_source='bloomberg',  # use Bloomberg as data source
                tickers=[
                    'EURUSD',  # ticker (Thalesians)
                    'GBPUSD',
Beispiel #28
0
class LoaderQuandl(LoaderTemplate):
    def __init__(self):
        super(LoaderQuandl, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        time_series_request_vendor = self.construct_vendor_time_series_request(
            time_series_request)

        self.logger.info("Request Quandl data")

        data_frame = self.download_daily(time_series_request_vendor)

        if data_frame is None or data_frame.index is []: return None

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_tickers = data_frame.columns

        if data_frame is not None:
            # tidy up tickers into a format that is more easily translatable
            # we can often get multiple fields returned (even if we don't ask for them!)
            # convert to lower case
            returned_fields = [(x.split(' - ')[1]).lower().replace(' ', '-')
                               for x in returned_tickers]
            returned_fields = [
                x.replace('value', 'close') for x in returned_fields
            ]  # special case for close

            returned_tickers = [x.replace('.', '/') for x in returned_tickers]
            returned_tickers = [x.split(' - ')[0] for x in returned_tickers]

            fields = self.translate_from_vendor_field(returned_fields,
                                                      time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        self.logger.info("Completed request from Quandl.")

        return data_frame

    def download_daily(self, time_series_request):
        trials = 0

        data_frame = None

        while (trials < 5):
            try:
                data_frame = Quandl.get(
                    time_series_request.tickers,
                    authtoken=Constants().quandl_api_key,
                    trim_start=time_series_request.start_date,
                    trim_end=time_series_request.finish_date)

                break
            except:
                trials = trials + 1
                self.logger.info("Attempting... " + str(trials) +
                                 " request to download from Quandl")

        if trials == 5:
            self.logger.error(
                "Couldn't download from Quandl after several attempts!")

        return data_frame
Beispiel #29
0
class TradeAnalysis:
    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime(
            "%Y%m%d") + ' '
        self.scale_factor = 3
        return

    def run_strategy_returns_stats(self, strategy):
        """
        run_strategy_returns_stats - Plots useful statistics for the trading strategy (using PyFolio)

        Parameters
        ----------
        strategy : StrategyTemplate
            defining trading strategy

        """

        pnl = strategy.get_strategy_pnl()
        tz = TimeSeriesTimezone()
        tsc = TimeSeriesCalcs()

        # PyFolio assumes UTC time based DataFrames (so force this localisation)
        try:
            pnl = tz.localise_index_as_UTC(pnl)
        except:
            pass

        # set the matplotlib style sheet & defaults
        try:
            matplotlib.rcdefaults()
            plt.style.use(Constants().
                          plotfactory_pythalesians_style_sheet['pythalesians'])
        except:
            pass

        # TODO for intraday strategies, make daily

        # convert DataFrame (assumed to have only one column) to Series
        pnl = tsc.calculate_returns(pnl)
        pnl = pnl[pnl.columns[0]]

        fig = pf.create_returns_tear_sheet(pnl, return_fig=True)

        try:
            plt.savefig(strategy.DUMP_PATH + "stats.png")
        except:
            pass

        plt.show()

    def run_tc_shock(self, strategy, tc=None):
        if tc is None: tc = [0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0]

        parameter_list = [{'spot_tc_bp': x} for x in tc]
        pretty_portfolio_names = [str(x) + 'bp'
                                  for x in tc]  # names of the portfolio
        parameter_type = 'TC analysis'  # broad type of parameter name

        return self.run_arbitrary_sensitivity(
            strategy,
            parameter_list=parameter_list,
            pretty_portfolio_names=pretty_portfolio_names,
            parameter_type=parameter_type)

    ###### Parameters and signal generations (need to be customised for every model)
    def run_arbitrary_sensitivity(self,
                                  strat,
                                  parameter_list=None,
                                  parameter_names=None,
                                  pretty_portfolio_names=None,
                                  parameter_type=None):

        asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets()

        port_list = None
        tsd_list = []

        for i in range(0, len(parameter_list)):
            br = strat.fill_backtest_request()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            strat.br = br  # for calculating signals

            signal_df = strat.construct_signal(spot_df, spot_df2,
                                               br.tech_params, br)

            cash_backtest = CashBacktest()
            self.logger.info("Calculating... " + pretty_portfolio_names[i])

            cash_backtest.calculate_trading_PnL(br, asset_df, signal_df)
            tsd_list.append(cash_backtest.get_portfolio_pnl_tsd())
            stats = str(cash_backtest.get_portfolio_pnl_desc()[0])

            port = cash_backtest.get_cumportfolio().resample('B')
            port.columns = [pretty_portfolio_names[i] + ' ' + stats]

            if port_list is None:
                port_list = port
            else:
                port_list = port_list.join(port)

        # reset the parameters of the strategy
        strat.br = strat.fill_backtest_request()

        pf = PlotFactory()
        gp = GraphProperties()

        ir = [t.inforatio()[0] for t in tsd_list]

        # gp.color = 'Blues'
        # plot all the variations
        gp.resample = 'B'
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + '.png'
        gp.scale_factor = self.scale_factor
        gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type
        pf.plot_line_graph(port_list, adapter='pythalesians', gp=gp)

        # plot all the IR in a bar chart form (can be easier to read!)
        gp = GraphProperties()
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + ' IR.png'
        gp.scale_factor = self.scale_factor
        gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type
        summary = pandas.DataFrame(index=pretty_portfolio_names,
                                   data=ir,
                                   columns=['IR'])
        pf.plot_bar_graph(summary, adapter='pythalesians', gp=gp)

        return port_list

    ###### Parameters and signal generations (need to be customised for every model)
    ###### Plot all the output seperately
    def run_arbitrary_sensitivity_separately(self,
                                             strat,
                                             parameter_list=None,
                                             pretty_portfolio_names=None,
                                             strip=None):

        # asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets()
        final_strategy = strat.FINAL_STRATEGY

        for i in range(0, len(parameter_list)):
            br = strat.fill_backtest_request()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            strat.FINAL_STRATEGY = final_strategy + " " + pretty_portfolio_names[
                i]

            self.logger.info("Calculating... " + pretty_portfolio_names[i])
            strat.br = br
            strat.construct_strategy(br=br)

            strat.plot_strategy_pnl()
            strat.plot_strategy_leverage()
            strat.plot_strategy_group_benchmark_pnl(strip=strip)

        # reset the parameters of the strategy
        strat.br = strat.fill_backtest_request()
        strat.FINAL_STRATEGY = final_strategy

    def run_day_of_month_analysis(self, strat):
        from pythalesians.economics.seasonality.seasonality import Seasonality
        from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs

        tsc = TimeSeriesCalcs()
        seas = Seasonality()
        strat.construct_strategy()
        pnl = strat.get_strategy_pnl()

        # get seasonality by day of the month
        pnl = pnl.resample('B')
        rets = tsc.calculate_returns(pnl)
        bus_day = seas.bus_day_of_month_seasonality(rets, add_average=True)

        # get seasonality by month
        pnl = pnl.resample('BM')
        rets = tsc.calculate_returns(pnl)
        month = seas.monthly_seasonality(rets)

        self.logger.info("About to plot seasonality...")
        gp = GraphProperties()
        pf = PlotFactory()

        # Plotting spot over day of month/month of year
        gp.color = 'Blues'
        gp.scale_factor = self.scale_factor
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality day of month.png'
        gp.title = strat.FINAL_STRATEGY + ' day of month seasonality'
        gp.display_legend = False
        gp.color_2_series = [bus_day.columns[-1]]
        gp.color_2 = ['red']  # red, pink
        gp.linewidth_2 = 4
        gp.linewidth_2_series = [bus_day.columns[-1]]
        gp.y_axis_2_series = [bus_day.columns[-1]]

        pf.plot_line_graph(bus_day, adapter='pythalesians', gp=gp)

        gp = GraphProperties()

        gp.scale_factor = self.scale_factor
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality month of year.png'
        gp.title = strat.FINAL_STRATEGY + ' month of year seasonality'

        pf.plot_line_graph(month, adapter='pythalesians', gp=gp)

        return month
Beispiel #30
0
class FXCrossFactory:

    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)
        self.fxconv = FXConv()

        if Constants().default_time_series_factory == 'lighttimeseriesfactory':
            self.time_series_factory = LightTimeSeriesFactory()
        else:
            self.time_series_factory = CachedTimeSeriesFactory()
        return

    def get_fx_cross_tick(self, start, end, cross,
                     cut = "NYC", source = "gain", cache_algo='cache_algo_return', type = 'spot'):

        if isinstance(cross, str):
            cross = [cross]

        time_series_request = TimeSeriesRequest()
        time_series_factory = self.time_series_factory
        data_frame_agg = None

        time_series_request.gran_freq = "tick"                  # tick

        time_series_request.freq_mult = 1                       # 1 min
        time_series_request.cut = cut                           # NYC/BGN ticker
        time_series_request.fields = ['bid', 'ask']             # bid/ask field only
        time_series_request.cache_algo = cache_algo             # cache_algo_only, cache_algo_return, internet_load

        time_series_request.environment = 'backtest'
        time_series_request.start_date = start
        time_series_request.finish_date = end
        time_series_request.data_source = source

        time_series_request.category = 'fx'

        for cr in cross:

            if (type == 'spot'):
                time_series_request.tickers = cr

                cross_vals = time_series_factory.harvest_time_series(time_series_request)
                cross_vals.columns = [cr + '.bid', cr + '.ask']

            if data_frame_agg is None:
                data_frame_agg = cross_vals
            else:
                data_frame_agg = data_frame_agg.join(cross_vals, how='outer')

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()
        return data_frame_agg


    def get_fx_cross(self, start, end, cross,
                     cut = "NYC", source = "bloomberg", freq = "intraday", cache_algo='cache_algo_return', type = 'spot'):

        if source == "gain" or source == 'dukascopy' or freq == 'tick':
            return self.get_fx_cross_tick(start, end, cross,
                     cut = cut, source = source, cache_algo='cache_algo_return', type = 'spot')

        if isinstance(cross, str):
            cross = [cross]

        time_series_request = TimeSeriesRequest()
        time_series_factory = self.time_series_factory
        time_series_calcs = TimeSeriesCalcs()
        data_frame_agg = None

        if freq == 'intraday':
            time_series_request.gran_freq = "minute"                # intraday

        elif freq == 'daily':
            time_series_request.gran_freq = "daily"                 # intraday

        time_series_request.freq_mult = 1                       # 1 min
        time_series_request.cut = cut                           # NYC/BGN ticker
        time_series_request.fields = 'close'                    # close field only
        time_series_request.cache_algo = cache_algo             # cache_algo_only, cache_algo_return, internet_load

        time_series_request.environment = 'backtest'
        time_series_request.start_date = start
        time_series_request.finish_date = end
        time_series_request.data_source = source

        for cr in cross:
            base = cr[0:3]
            terms = cr[3:6]

            if (type == 'spot'):
                # non-USD crosses
                if base != 'USD' and terms != 'USD':
                    base_USD = self.fxconv.correct_notation('USD' + base)
                    terms_USD = self.fxconv.correct_notation('USD' + terms)

                    # TODO check if the cross exists in the database

                    # download base USD cross
                    time_series_request.tickers = base_USD
                    time_series_request.category = self.fxconv.em_or_g10(base, freq)
                    base_vals = time_series_factory.harvest_time_series(time_series_request)

                    # download terms USD cross
                    time_series_request.tickers = terms_USD
                    time_series_request.category = self.fxconv.em_or_g10(terms, freq)
                    terms_vals = time_series_factory.harvest_time_series(time_series_request)

                    if (base_USD[0:3] == 'USD'):
                        base_vals = 1 / base_vals
                    if (terms_USD[0:3] == 'USD'):
                        terms_vals = 1 / terms_vals

                    base_vals.columns = ['temp']
                    terms_vals.columns = ['temp']
                    cross_vals = base_vals.div(terms_vals, axis = 'index')
                    cross_vals.columns = [cr + '.close']

                else:
                    if base == 'USD': non_USD = terms
                    if terms == 'USD': non_USD = base

                    correct_cr = self.fxconv.correct_notation(cr)

                    time_series_request.tickers = correct_cr
                    time_series_request.category = self.fxconv.em_or_g10(non_USD, freq)
                    cross_vals = time_series_factory.harvest_time_series(time_series_request)

                    # flip if not convention
                    if(correct_cr != cr):
                        cross_vals = 1 / cross_vals

                    cross_vals.columns.names = [cr + '.close']

            elif type[0:3] == "tot":
                if freq == 'daily':
                    # download base USD cross
                    time_series_request.tickers = base + 'USD'
                    time_series_request.category = self.fxconv.em_or_g10(base, freq) + '-tot'

                    if type == "tot":
                        base_vals = time_series_factory.harvest_time_series(time_series_request)
                    else:
                        x = 0

                    # download terms USD cross
                    time_series_request.tickers = terms + 'USD'
                    time_series_request.category = self.fxconv.em_or_g10(terms, freq) + '-tot'

                    if type == "tot":
                        terms_vals = time_series_factory.harvest_time_series(time_series_request)
                    else:
                        x = 0

                    base_rets = time_series_calcs.calculate_returns(base_vals)
                    terms_rets = time_series_calcs.calculate_returns(terms_vals)

                    cross_rets = base_rets.sub(terms_rets.iloc[:,0],axis=0)

                    # first returns of a time series will by NaN, given we don't know previous point
                    cross_rets.iloc[0] = 0

                    cross_vals = time_series_calcs.create_mult_index(cross_rets)
                    cross_vals.columns = [cr + '-tot.close']

                elif freq == 'intraday':
                    self.logger.info('Total calculated returns for intraday not implemented yet')
                    return None

            if data_frame_agg is None:
                data_frame_agg = cross_vals
            else:
                data_frame_agg = data_frame_agg.join(cross_vals, how='outer')

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()
        return data_frame_agg
Beispiel #31
0
class BBGLowLevelTick(BBGLowLevelTemplate):
    def __init__(self):
        super(BBGLowLevelTick, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

        # constants
        self.TICK_DATA = blpapi.Name("tickData")
        self.COND_CODE = blpapi.Name("conditionCodes")
        self.TICK_SIZE = blpapi.Name("size")
        self.TIME = blpapi.Name("time")
        self.TYPE = blpapi.Name("type")
        self.VALUE = blpapi.Name("value")
        self.RESPONSE_ERROR = blpapi.Name("responseError")
        self.CATEGORY = blpapi.Name("category")
        self.MESSAGE = blpapi.Name("message")
        self.SESSION_TERMINATED = blpapi.Name("SessionTerminated")

    def combine_slices(self, data_frame, data_frame_slice):
        return data_frame.append(data_frame_slice)

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers[0]  # get 1st ticker only!
        self._options.event = time_series_request.trade_side.upper()
        # self._options.barInterval = time_series_request.freq_mult
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        # self._options.gapFillInitialBar = False

        if hasattr(self._options.startDateTime, "microsecond"):
            self._options.startDateTime = self._options.startDateTime.replace(microsecond=0)

        if hasattr(self._options.endDateTime, "microsecond"):
            self._options.endDateTime = self._options.endDateTime.replace(microsecond=0)

        return self._options

    # iterate through Bloomberg output creating a DataFrame output
    # implements abstract method
    def process_message(self, msg):
        data = msg.getElement(self.TICK_DATA).getElement(self.TICK_DATA)

        self.logger.info("Processing tick data for " + str(self._options.security))
        tuple = []

        data_vals = data.values()

        # for item in list(data_vals):
        #     if item.hasElement(self.COND_CODE):
        #         cc = item.getElementAsString(self.COND_CODE)
        #     else:
        #         cc = ""
        #
        #     # each price time point has multiple fields - marginally quicker
        #     tuple.append(([item.getElementAsFloat(self.VALUE),
        #                     item.getElementAsInteger(self.TICK_SIZE)],
        #                     item.getElementAsDatetime(self.TIME)))

        # slightly faster this way (note, we are skipping trade & CC fields)
        tuple = [
            (
                [item.getElementAsFloat(self.VALUE), item.getElementAsInteger(self.TICK_SIZE)],
                item.getElementAsDatetime(self.TIME),
            )
            for item in data_vals
        ]

        data_table = list(map(itemgetter(0), tuple))
        time_list = list(map(itemgetter(1), tuple))

        try:
            self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1]))
        except:
            self.logger.info("No dates retrieved")
            return None

        # create pandas dataframe with the Bloomberg output
        return pandas.DataFrame(data=data_table, index=time_list, columns=["close", "ticksize"])

    # implement abstract method: create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("IntradayTickRequest")

        # only one security/eventType per request
        request.set("security", self._options.security)
        request.getElement("eventTypes").appendValue("TRADE")
        # request.set("eventTypes", self._options.event)
        request.set("includeConditionCodes", True)

        # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT')

        if self._options.startDateTime and self._options.endDateTime:
            request.set("startDateTime", self._options.startDateTime)
            request.set("endDateTime", self._options.endDateTime)

        self.logger.info("Sending Tick Bloomberg Request...")

        session.sendRequest(request)
Beispiel #32
0
class HistoricalDataRequest(Request):
    def __init__(self,
                 symbols,
                 fields,
                 start=None,
                 end=None,
                 period='DAILY',
                 addtl_sets=None,
                 ignore_security_error=0,
                 ignore_field_error=0):
        """ Historical data request for bbg.

        Parameters
        ----------
        symbols : string or list
        fields : string or list
        start : start date (if None then use 1 year ago)
        end : end date (if None then use today)
        period : ('DAILY', 'WEEKLY', 'MONTHLY', 'QUARTERLY', 'SEMI-ANNUAL', 'YEARLY')
        ignore_field_errors : bool
        ignore_security_errors : bool

        """

        Request.__init__(self,
                         ignore_security_error=ignore_security_error,
                         ignore_field_error=ignore_field_error)

        assert period in ('DAILY', 'WEEKLY', 'MONTHLY', 'QUARTERLY',
                          'SEMI-ANNUAL', 'YEARLY')
        self.symbols = isinstance(symbols, str) and [symbols] or symbols
        self.fields = isinstance(fields, str) and [fields] or fields

        if start is None:
            start = datetime.today() - timedelta(
                365)  # by default download the past year
        if end is None:
            end = datetime.today()

        self.start = to_datetime(start)
        self.end = to_datetime(end)
        self.period = period

        self.logger = LoggerManager().getLogger(__name__)

        # response related
        self.response = {}

    def get_bbg_service_name(self):
        return '//blp/refdata'

    def get_bbg_request(self, svc, session):
        # create the bbg request object
        request = svc.CreateRequest('HistoricalDataRequest')
        [
            request.GetElement('securities').AppendValue(sec)
            for sec in self.symbols
        ]
        [request.GetElement('fields').AppendValue(fld) for fld in self.fields]
        request.Set('startDate', self.start.strftime('%Y%m%d'))
        request.Set('endDate', self.end.strftime('%Y%m%d'))
        request.Set('periodicitySelection', self.period)

        o = request.GetElement('overrides').AppendElment()
        o.SetElement('fieldId', 'TIME_ZONE_OVERRIDE')
        o.SetElement('value', 'GMT')

        return request

    def on_security_data_node(self, node):
        """ process a securityData node - FIXME: currently not handling relateDate node """
        sid = XmlHelper.get_child_value(node, 'security')
        farr = node.GetElement('fieldData')
        dmap = defaultdict(list)

        self.logger.info("Fetching ticker " + sid)

        for i in range(farr.NumValues):
            pt = farr.GetValue(i)
            [
                dmap[f].append(XmlHelper.get_child_value(pt, f))
                for f in ['date'] + self.fields
            ]

        self.logger.info("Returning ticker " + sid)

        idx = dmap.pop('date')
        frame = DataFrame(dmap, columns=self.fields, index=idx)
        frame.index.name = 'date'
        self.response[sid] = frame

    def on_event(self, evt, is_final):
        """
        on_event - This is invoked from in response to COM PumpWaitingMessages - different thread

        """

        for msg in XmlHelper.message_iter(evt):
            # Single security element in historical request
            node = msg.GetElement('securityData')
            if node.HasElement('securityError'):
                self.security_errors.append(
                    XmlHelper.as_security_error(
                        node.GetElement('securityError')))
            else:
                self.on_security_data_node(node)

    def response_as_single(self, copy=0):
        """
        response_as_single - convert the response map to a single data frame with Multi-Index columns

        """

        arr = []

        for sid, frame in self.response.items():
            if copy:
                frame = frame.copy()
            'security' not in frame and frame.insert(0, 'security', sid)
            arr.append(frame.reset_index().set_index(['date', 'security']))

        # time.sleep(1000)
        if (arr == []): return arr

        return concat(arr).unstack()

    def response_as_panel(self, swap=False):
        panel = Panel(self.response)
        if swap:
            panel = panel.swapaxes('items', 'minor')
        return panel
Beispiel #33
0
class BBGLowLevelDaily(BBGLowLevelTemplate):
    def __init__(self):
        super(BBGLowLevelDaily, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
        self._options = []

    def combine_slices(self, data_frame, data_frame_slice):
        if (data_frame_slice.columns.get_level_values(1).values[0]
                not in data_frame.columns.get_level_values(1).values):

            return data_frame.join(data_frame_slice, how="outer")

        return data_frame

    # populate options for Bloomberg request for asset daily request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        self._options.fields = time_series_request.fields

        return self._options

    def process_message(self, msg):
        # Process received events
        ticker = msg.getElement('securityData').getElement(
            'security').getValue()
        fieldData = msg.getElement('securityData').getElement('fieldData')

        # SLOW loop (careful, not all the fields will be returned every time
        # hence need to include the field name in the tuple)
        data = defaultdict(dict)

        for i in range(fieldData.numValues()):
            for j in range(1, fieldData.getValue(i).numElements()):
                data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \
                    = fieldData.getValue(i).getElement(j).getValue()

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not (data_frame.empty)):
            # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            data_frame.index = pandas.to_datetime(data_frame.index)
            self.logger.info("Read: " + ticker + ' ' +
                             str(data_frame.index[0]) + ' - ' +
                             str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("HistoricalDataRequest")

        request.set("startDate",
                    self._options.startDateTime.strftime('%Y%m%d'))
        request.set("endDate", self._options.endDateTime.strftime('%Y%m%d'))

        # # only one security/eventType per request
        for field in self._options.fields:
            request.getElement("fields").appendValue(field)

        for security in self._options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Daily Request:" + str(request))
        session.sendRequest(request)
Beispiel #34
0
class BBGLowLevelDaily(BBGLowLevelTemplate):

    def __init__(self):
        super(BBGLowLevelDaily, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
        self._options = []

    def combine_slices(self, data_frame, data_frame_slice):
        if (data_frame_slice.columns.get_level_values(1).values[0]
            not in data_frame.columns.get_level_values(1).values):

            return data_frame.join(data_frame_slice, how="outer")

        return data_frame

    # populate options for Bloomberg request for asset daily request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        self._options.fields = time_series_request.fields

        return self._options

    def process_message(self, msg):
        # Process received events
        ticker = msg.getElement('securityData').getElement('security').getValue()
        fieldData = msg.getElement('securityData').getElement('fieldData')

        # SLOW loop (careful, not all the fields will be returned every time
        # hence need to include the field name in the tuple)
        data = defaultdict(dict)

        for i in range(fieldData.numValues()):
            for j in range(1, fieldData.getValue(i).numElements()):
                data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \
                    = fieldData.getValue(i).getElement(j).getValue()

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not(data_frame.empty)):
            # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            data_frame.index = pandas.to_datetime(data_frame.index)
            self.logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("HistoricalDataRequest")

        request.set("startDate", self._options.startDateTime.strftime('%Y%m%d'))
        request.set("endDate", self._options.endDateTime.strftime('%Y%m%d'))

        # # only one security/eventType per request
        for field in self._options.fields:
            request.getElement("fields").appendValue(field)

        for security in self._options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Daily Request:" + str(request))
        session.sendRequest(request)
class LoaderPandasWeb(LoaderTemplate):
    def __init__(self):
        super(LoaderPandasWeb, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request)

        self.logger.info("Request Pandas Web data")

        data_frame = self.download_daily(time_series_request_vendor)

        if time_series_request_vendor.data_source == "fred":
            returned_fields = ["close" for x in data_frame.columns.values]
            returned_tickers = data_frame.columns.values
        else:
            data_frame = data_frame.to_frame().unstack()

            # print(data_frame.tail())

            if data_frame.index is []:
                return None

            # convert from vendor to Thalesians tickers/fields
            if data_frame is not None:
                returned_fields = data_frame.columns.get_level_values(0)
                returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            fields = self.translate_from_vendor_field(returned_fields, time_series_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            ticker_requested = []

            for f in time_series_request.fields:
                for t in time_series_request.tickers:
                    ticker_requested.append(t + "." + f)

            data_frame.columns = ticker_combined
            data_frame.index.name = "Date"

            # only return the requested tickers
            data_frame = pandas.DataFrame(
                data=data_frame[ticker_requested], index=data_frame.index, columns=ticker_requested
            )

        self.logger.info("Completed request from Pandas Web.")

        return data_frame

    def download_daily(self, time_series_request):
        return web.DataReader(
            time_series_request.tickers,
            time_series_request.data_source,
            time_series_request.start_date,
            time_series_request.finish_date,
        )
Beispiel #36
0
class BBGLowLevelIntraday(BBGLowLevelTemplate):

    def __init__(self):
        super(BBGLowLevelIntraday, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

        # constants
        self.BAR_DATA = blpapi.Name("barData")
        self.BAR_TICK_DATA = blpapi.Name("barTickData")
        self.OPEN = blpapi.Name("open")
        self.HIGH = blpapi.Name("high")
        self.LOW = blpapi.Name("low")
        self.CLOSE = blpapi.Name("close")
        self.VOLUME = blpapi.Name("volume")
        self.NUM_EVENTS = blpapi.Name("numEvents")
        self.TIME = blpapi.Name("time")

    def combine_slices(self, data_frame, data_frame_slice):
        return data_frame.append(data_frame_slice)

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers[0]    # get 1st ticker only!
        self._options.event = "TRADE"
        self._options.barInterval = time_series_request.freq_mult
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        self._options.gapFillInitialBar = False

        if hasattr(self._options.startDateTime, 'microsecond'):
            self._options.startDateTime = self._options.startDateTime.replace(microsecond=0)

        if hasattr(self._options.endDateTime, 'microsecond'):
            self._options.endDateTime = self._options.endDateTime.replace(microsecond=0)

        return self._options

    # iterate through Bloomberg output creating a DataFrame output
    # implements abstract method
    def process_message(self, msg):
        data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA)

        self.logger.info("Processing intraday data for " + str(self._options.security))

        data_vals = list(data.values())

        # data_matrix = numpy.zeros([len(data_vals), 6])
        # data_matrix.fill(numpy.nan)
        #
        # date_index = [None] * len(data_vals)
        #
        # for i in range(0, len(data_vals)):
        #     data_matrix[i][0] = data_vals[i].getElementAsFloat(self.OPEN)
        #     data_matrix[i][1] = data_vals[i].getElementAsFloat(self.HIGH)
        #     data_matrix[i][2] = data_vals[i].getElementAsFloat(self.LOW)
        #     data_matrix[i][3] = data_vals[i].getElementAsFloat(self.CLOSE)
        #     data_matrix[i][4] = data_vals[i].getElementAsInteger(self.VOLUME)
        #     data_matrix[i][5] = data_vals[i].getElementAsInteger(self.NUM_EVENTS)
        #
        #     date_index[i] = data_vals[i].getElementAsDatetime(self.TIME)
        #
        # self.logger.info("Dates between " + str(date_index[0]) + " - " + str(date_index[-1]))
        #
        # # create pandas dataframe with the Bloomberg output
        # return pandas.DataFrame(data = data_matrix, index = date_index,
        #                columns=['open', 'high', 'low', 'close', 'volume', 'events'])

        ## for loop method is touch slower
        # time_list = []
        # data_table = []

        # for bar in data_vals:
        #     data_table.append([bar.getElementAsFloat(self.OPEN),
        #                  bar.getElementAsFloat(self.HIGH),
        #                  bar.getElementAsFloat(self.LOW),
        #                  bar.getElementAsFloat(self.CLOSE),
        #                  bar.getElementAsInteger(self.VOLUME),
        #                  bar.getElementAsInteger(self.NUM_EVENTS)])
        #
        #     time_list.append(bar.getElementAsDatetime(self.TIME))

        # each price time point has multiple fields - marginally quicker
        tuple = [([bar.getElementAsFloat(self.OPEN),
                        bar.getElementAsFloat(self.HIGH),
                        bar.getElementAsFloat(self.LOW),
                        bar.getElementAsFloat(self.CLOSE),
                        bar.getElementAsInteger(self.VOLUME),
                        bar.getElementAsInteger(self.NUM_EVENTS)],
                        bar.getElementAsDatetime(self.TIME)) for bar in data_vals]

        data_table = list(map(itemgetter(0), tuple))
        time_list = list(map(itemgetter(1), tuple))

        try:
            self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1]))
        except:
            self.logger.info("No dates retrieved")
            return None

        # create pandas dataframe with the Bloomberg output
        return pandas.DataFrame(data = data_table, index = time_list,
                      columns=['open', 'high', 'low', 'close', 'volume', 'events'])

    # implement abstract method: create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("IntradayBarRequest")

        # only one security/eventType per request
        request.set("security", self._options.security)
        request.set("eventType", self._options.event)
        request.set("interval", self._options.barInterval)

        # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT')

        if self._options.startDateTime and self._options.endDateTime:
            request.set("startDateTime", self._options.startDateTime)
            request.set("endDateTime", self._options.endDateTime)

        if self._options.gapFillInitialBar:
            request.append("gapFillInitialBar", True)

        self.logger.info("Sending Intraday Bloomberg Request...")

        session.sendRequest(request)
class LoaderDukasCopy(LoaderTemplate):
    tick_name  = "{symbol}/{year}/{month}/{day}/{hour}h_ticks.bi5"

    def __init__(self):
        super(LoaderTemplate, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        import logging
        logging.getLogger("requests").setLevel(logging.WARNING)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request)

        data_frame = None
        self.logger.info("Request Dukascopy data")

        # doesn't support non-tick data
        if (time_series_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly', 'intraday', 'minute', 'hourly']):
            self.logger.warning("Dukascopy loader is for tick data only")

            return None

        # assume one ticker only (LightTimeSeriesFactory only calls one ticker at a time)
        if (time_series_request.freq in ['tick']):
            # time_series_request_vendor.tickers = time_series_request_vendor.tickers[0]

            data_frame = self.get_tick(time_series_request, time_series_request_vendor)

            if data_frame is not None: data_frame.tz_localize('UTC')

        self.logger.info("Completed request from Dukascopy")

        return data_frame

    def kill_session(self):
        return

    def get_tick(self, time_series_request, time_series_request_vendor):

        data_frame = self.download_tick(time_series_request_vendor)

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns
            returned_tickers = [time_series_request_vendor.tickers[0]] * (len(returned_fields))

        if data_frame is not None:
            fields = self.translate_from_vendor_field(returned_fields, time_series_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def download_tick(self, time_series_request):

        symbol = time_series_request.tickers[0]
        df_list = []

        self.logger.info("About to download from Dukascopy... for " + symbol)

        # single threaded
        df_list = [self.fetch_file(time, symbol) for time in
                   self.hour_range(time_series_request.start_date, time_series_request.finish_date)]

        # parallel (has pickle issues)
        # time_list = self.hour_range(time_series_request.start_date, time_series_request.finish_date)
        # df_list = Parallel(n_jobs=-1)(delayed(self.fetch_file)(time, symbol) for time in time_list)

        try:
            return pandas.concat(df_list)
        except:
            return None

    def fetch_file(self, time, symbol):
        if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time))

        tick_path = self.tick_name.format(
                symbol = symbol,
                year = str(time.year).rjust(4, '0'),
                month = str(time.month).rjust(2, '0'),
                day = str(time.day).rjust(2, '0'),
                hour = str(time.hour).rjust(2, '0')
            )

        tick = self.fetch_tick(Constants().dukascopy_base_url + tick_path)

        if Constants().dukascopy_write_temp_tick_disk:
            out_path = Constants().temp_pythalesians_folder + "/dkticks/" + tick_path

            if not os.path.exists(out_path):
                if not os.path.exists(os.path.dirname(out_path)):
                    os.makedirs(os.path.dirname(out_path))

            self.write_tick(tick, out_path)

        try:
            return self.retrieve_df(lzma.decompress(tick), symbol, time)
        except:
            return None

    def fetch_tick(self, tick_url):
        i = 0
        tick_request = None

        # try up to 5 times to download
        while i < 5:
            try:
                tick_request = requests.get(tick_url)
                i = 5
            except:
                i = i + 1

        if (tick_request is None):
            self.logger("Failed to download from " + tick_url)
            return None

        return tick_request.content

    def write_tick(self, content, out_path):
        data_file = open(out_path, "wb+")
        data_file.write(content)
        data_file.close()

    def chunks(self, list, n):
        if n < 1:
            n = 1
        return [list[i:i + n] for i in range(0, len(list), n)]

    def retrieve_df(self, data, symbol, epoch):
        date, tuple = pythalesians.market.loaders.lowlevel.brokers.parserows.parse_tick_data(data, epoch)

        df = pandas.DataFrame(data = tuple, columns=['temp', 'bid', 'ask', 'bidv', 'askv'], index = date)
        df.drop('temp', axis = 1)
        df.index.name = 'Date'

        divisor = 100000

        # where JPY is the terms currency we have different divisor
        if symbol[3:6] == 'JPY':
            divisor = 1000

        # prices are returned without decimal point
        df['bid'] =  df['bid'] /  divisor
        df['ask'] =  df['ask'] / divisor

        return df

    def hour_range(self, start_date, end_date):
          delta_t = end_date - start_date

          delta_hours = (delta_t.days *  24.0) + (delta_t.seconds / 3600.0)
          for n in range(int (delta_hours)):
              yield start_date + timedelta(0, 0, 0, 0, 0, n) # Hours

    def get_daily_data(self):
        pass
Beispiel #38
0
class TradeAnalysis:

    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' '
        self.scale_factor = 3
        return

    def run_strategy_returns_stats(self, strategy):
        """
        run_strategy_returns_stats - Plots useful statistics for the trading strategy (using PyFolio)

        Parameters
        ----------
        strategy : StrategyTemplate
            defining trading strategy

        """

        pnl = strategy.get_strategy_pnl()
        tz = TimeSeriesTimezone()
        tsc = TimeSeriesCalcs()

        # PyFolio assumes UTC time based DataFrames (so force this localisation)
        try:
            pnl = tz.localise_index_as_UTC(pnl)
        except: pass

        # set the matplotlib style sheet & defaults
        try:
            matplotlib.rcdefaults()
            plt.style.use(Constants().plotfactory_pythalesians_style_sheet['pythalesians'])
        except: pass

        # TODO for intraday strategies, make daily

        # convert DataFrame (assumed to have only one column) to Series
        pnl = tsc.calculate_returns(pnl)
        pnl = pnl[pnl.columns[0]]

        fig = pf.create_returns_tear_sheet(pnl, return_fig=True)

        try:
            plt.savefig (strategy.DUMP_PATH + "stats.png")
        except: pass

        plt.show()

    def run_tc_shock(self, strategy, tc = None):
        if tc is None: tc = [0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0]

        parameter_list = [{'spot_tc_bp' : x } for x in tc]
        pretty_portfolio_names = [str(x) + 'bp' for x in tc]    # names of the portfolio
        parameter_type = 'TC analysis'                          # broad type of parameter name

        return self.run_arbitrary_sensitivity(strategy,
                                 parameter_list=parameter_list,
                                 pretty_portfolio_names=pretty_portfolio_names,
                                 parameter_type=parameter_type)

    ###### Parameters and signal generations (need to be customised for every model)
    def run_arbitrary_sensitivity(self, strat, parameter_list = None, parameter_names = None,
                                  pretty_portfolio_names = None, parameter_type = None):

        asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets()

        port_list = None
        tsd_list = []

        for i in range(0, len(parameter_list)):
            br = strat.fill_backtest_request()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            strat.br = br   # for calculating signals

            signal_df = strat.construct_signal(spot_df, spot_df2, br.tech_params, br)

            cash_backtest = CashBacktest()
            self.logger.info("Calculating... " + pretty_portfolio_names[i])

            cash_backtest.calculate_trading_PnL(br, asset_df, signal_df)
            tsd_list.append(cash_backtest.get_portfolio_pnl_tsd())
            stats = str(cash_backtest.get_portfolio_pnl_desc()[0])

            port = cash_backtest.get_cumportfolio().resample('B').mean()
            port.columns = [pretty_portfolio_names[i] + ' ' + stats]

            if port_list is None:
                port_list = port
            else:
                port_list = port_list.join(port)

        # reset the parameters of the strategy
        strat.br = strat.fill_backtest_request()

        pf = PlotFactory()
        gp = GraphProperties()

        ir = [t.inforatio()[0] for t in tsd_list]

        # gp.color = 'Blues'
        # plot all the variations
        gp.resample = 'B'
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + '.png'
        gp.scale_factor = self.scale_factor
        gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type
        pf.plot_line_graph(port_list, adapter = 'pythalesians', gp = gp)

        # plot all the IR in a bar chart form (can be easier to read!)
        gp = GraphProperties()
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' ' + parameter_type + ' IR.png'
        gp.scale_factor = self.scale_factor
        gp.title = strat.FINAL_STRATEGY + ' ' + parameter_type
        summary = pandas.DataFrame(index = pretty_portfolio_names, data = ir, columns = ['IR'])
        pf.plot_bar_graph(summary, adapter = 'pythalesians', gp = gp)

        return port_list

    ###### Parameters and signal generations (need to be customised for every model)
    ###### Plot all the output seperately
    def run_arbitrary_sensitivity_separately(self, strat, parameter_list = None,
                                  pretty_portfolio_names = None, strip = None):

        # asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets()
        final_strategy = strat.FINAL_STRATEGY

        for i in range(0, len(parameter_list)):
            br = strat.fill_backtest_request()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            strat.FINAL_STRATEGY = final_strategy + " " + pretty_portfolio_names[i]

            self.logger.info("Calculating... " + pretty_portfolio_names[i])
            strat.br = br
            strat.construct_strategy(br = br)

            strat.plot_strategy_pnl()
            strat.plot_strategy_leverage()
            strat.plot_strategy_group_benchmark_pnl(strip = strip)

        # reset the parameters of the strategy
        strat.br = strat.fill_backtest_request()
        strat.FINAL_STRATEGY = final_strategy

    def run_day_of_month_analysis(self, strat):
        from pythalesians.economics.seasonality.seasonality import Seasonality
        from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs

        tsc = TimeSeriesCalcs()
        seas = Seasonality()
        strat.construct_strategy()
        pnl = strat.get_strategy_pnl()

        # get seasonality by day of the month
        pnl = pnl.resample('B').mean()
        rets = tsc.calculate_returns(pnl)
        bus_day = seas.bus_day_of_month_seasonality(rets, add_average = True)

        # get seasonality by month
        pnl = pnl.resample('BM').mean()
        rets = tsc.calculate_returns(pnl)
        month = seas.monthly_seasonality(rets)

        self.logger.info("About to plot seasonality...")
        gp = GraphProperties()
        pf = PlotFactory()

        # Plotting spot over day of month/month of year
        gp.color = 'Blues'
        gp.scale_factor = self.scale_factor
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality day of month.png'
        gp.title = strat.FINAL_STRATEGY + ' day of month seasonality'
        gp.display_legend = False
        gp.color_2_series = [bus_day.columns[-1]]
        gp.color_2 = ['red'] # red, pink
        gp.linewidth_2 = 4
        gp.linewidth_2_series = [bus_day.columns[-1]]
        gp.y_axis_2_series = [bus_day.columns[-1]]

        pf.plot_line_graph(bus_day, adapter = 'pythalesians', gp = gp)

        gp = GraphProperties()

        gp.scale_factor = self.scale_factor
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality month of year.png'
        gp.title = strat.FINAL_STRATEGY + ' month of year seasonality'

        pf.plot_line_graph(month, adapter = 'pythalesians', gp = gp)

        return month
class LoaderDukasCopy(LoaderTemplate):
    tick_name = "{symbol}/{year}/{month}/{day}/{hour}h_ticks.bi5"

    def __init__(self):
        super(LoaderTemplate, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        import logging
        logging.getLogger("requests").setLevel(logging.WARNING)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        time_series_request_vendor = self.construct_vendor_time_series_request(
            time_series_request)

        data_frame = None
        self.logger.info("Request Dukascopy data")

        # doesn't support non-tick data
        if (time_series_request.freq in [
                'daily', 'weekly', 'monthly', 'quarterly', 'yearly',
                'intraday', 'minute', 'hourly'
        ]):
            self.logger.warning("Dukascopy loader is for tick data only")

            return None

        # assume one ticker only (LightTimeSeriesFactory only calls one ticker at a time)
        if (time_series_request.freq in ['tick']):
            # time_series_request_vendor.tickers = time_series_request_vendor.tickers[0]

            data_frame = self.get_tick(time_series_request,
                                       time_series_request_vendor)

            if data_frame is not None: data_frame.tz_localize('UTC')

        self.logger.info("Completed request from Dukascopy")

        return data_frame

    def kill_session(self):
        return

    def get_tick(self, time_series_request, time_series_request_vendor):

        data_frame = self.download_tick(time_series_request_vendor)

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns
            returned_tickers = [time_series_request_vendor.tickers[0]
                                ] * (len(returned_fields))

        if data_frame is not None:
            fields = self.translate_from_vendor_field(returned_fields,
                                                      time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def download_tick(self, time_series_request):

        symbol = time_series_request.tickers[0]
        df_list = []

        self.logger.info("About to download from Dukascopy... for " + symbol)

        # single threaded
        df_list = [
            self.fetch_file(time, symbol)
            for time in self.hour_range(time_series_request.start_date,
                                        time_series_request.finish_date)
        ]

        # parallel (has pickle issues)
        # time_list = self.hour_range(time_series_request.start_date, time_series_request.finish_date)
        # df_list = Parallel(n_jobs=-1)(delayed(self.fetch_file)(time, symbol) for time in time_list)

        try:
            return pandas.concat(df_list)
        except:
            return None

    def fetch_file(self, time, symbol):
        if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time))

        tick_path = self.tick_name.format(symbol=symbol,
                                          year=str(time.year).rjust(4, '0'),
                                          month=str(time.month).rjust(2, '0'),
                                          day=str(time.day).rjust(2, '0'),
                                          hour=str(time.hour).rjust(2, '0'))

        tick = self.fetch_tick(Constants().dukascopy_base_url + tick_path)

        if Constants().dukascopy_write_temp_tick_disk:
            out_path = Constants(
            ).temp_pythalesians_folder + "/dkticks/" + tick_path

            if not os.path.exists(out_path):
                if not os.path.exists(os.path.dirname(out_path)):
                    os.makedirs(os.path.dirname(out_path))

            self.write_tick(tick, out_path)

        try:
            return self.retrieve_df(lzma.decompress(tick), symbol, time)
        except:
            return None

    def fetch_tick(self, tick_url):
        i = 0
        tick_request = None

        # try up to 5 times to download
        while i < 5:
            try:
                tick_request = requests.get(tick_url)
                i = 5
            except:
                i = i + 1

        if (tick_request is None):
            self.logger("Failed to download from " + tick_url)
            return None

        return tick_request.content

    def write_tick(self, content, out_path):
        data_file = open(out_path, "wb+")
        data_file.write(content)
        data_file.close()

    def chunks(self, list, n):
        if n < 1:
            n = 1
        return [list[i:i + n] for i in range(0, len(list), n)]

    def retrieve_df(self, data, symbol, epoch):
        date, tuple = pythalesians.market.loaders.lowlevel.brokers.parserows.parse_tick_data(
            data, epoch)

        df = pandas.DataFrame(data=tuple,
                              columns=['temp', 'bid', 'ask', 'bidv', 'askv'],
                              index=date)
        df.drop('temp', axis=1)
        df.index.name = 'Date'

        divisor = 100000

        # where JPY is the terms currency we have different divisor
        if symbol[3:6] == 'JPY':
            divisor = 1000

        # prices are returned without decimal point
        df['bid'] = df['bid'] / divisor
        df['ask'] = df['ask'] / divisor

        return df

    def hour_range(self, start_date, end_date):
        delta_t = end_date - start_date

        delta_hours = (delta_t.days * 24.0) + (delta_t.seconds / 3600.0)
        for n in range(int(delta_hours)):
            yield start_date + timedelta(0, 0, 0, 0, 0, n)  # Hours

    def get_daily_data(self):
        pass
class StrategyFXCTA_Example(StrategyTemplate):

    def __init__(self):
        super(StrategyTemplate, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        ##### FILL IN WITH YOUR OWN PARAMETERS FOR display, dumping, TSF etc.
        self.tsfactory = LightTimeSeriesFactory()
        self.DUMP_CSV = 'output_data/'
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' '
        self.FINAL_STRATEGY = 'Thalesians FX CTA'
        self.SCALE_FACTOR = 3
        
        return

    ###### Parameters and signal generations (need to be customised for every model)
    def fill_backtest_request(self):

        ##### FILL IN WITH YOUR OWN BACKTESTING PARAMETERS
        br = BacktestRequest()

        # get all asset data
        br.start_date = "04 Jan 1989"
        br.finish_date = datetime.datetime.utcnow()
        br.spot_tc_bp = 0.5
        br.ann_factor = 252

        br.plot_start = "01 Apr 2015"
        br.calc_stats = True
        br.write_csv = False
        br.plot_interim = True
        br.include_benchmark = True

        # have vol target for each signal
        br.signal_vol_adjust = True
        br.signal_vol_target = 0.1
        br.signal_vol_max_leverage = 5
        br.signal_vol_periods = 20
        br.signal_vol_obs_in_year = 252
        br.signal_vol_rebalance_freq = 'BM'
        br.signal_vol_resample_freq = None

        # have vol target for portfolio
        br.portfolio_vol_adjust = True
        br.portfolio_vol_target = 0.1
        br.portfolio_vol_max_leverage = 5
        br.portfolio_vol_periods = 20
        br.portfolio_vol_obs_in_year = 252
        br.portfolio_vol_rebalance_freq = 'BM'
        br.portfolio_vol_resample_freq = None

        # tech params
        br.tech_params.sma_period = 200

        return br

    def fill_assets(self):
        ##### FILL IN WITH YOUR ASSET DATA

        # for FX basket
        full_bkt    = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD',
                       'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK']

        basket_dict = {}

        for i in range(0, len(full_bkt)):
            basket_dict[full_bkt[i]] = [full_bkt[i]]

        basket_dict['Thalesians FX CTA'] = full_bkt

        br = self.fill_backtest_request()

        self.logger.info("Loading asset data...")

        vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS',
                          'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS']

        time_series_request = TimeSeriesRequest(
                    start_date = br.start_date,                     # start date
                    finish_date = br.finish_date,                   # finish date
                    freq = 'daily',                                 # daily data
                    data_source = 'quandl',                         # use Quandl as data source
                    tickers = full_bkt,                             # ticker (Thalesians)
                    fields = ['close'],                                 # which fields to download
                    vendor_tickers = vendor_tickers,                    # ticker (Quandl)
                    vendor_fields = ['close'],                          # which Bloomberg fields to download
                    cache_algo = 'internet_load_return')                # how to return data

        asset_df = self.tsfactory.harvest_time_series(time_series_request)

        # signalling variables
        spot_df = asset_df
        spot_df2 = None

        return asset_df, spot_df, spot_df2, basket_dict

    def construct_signal(self, spot_df, spot_df2, tech_params, br):

        ##### FILL IN WITH YOUR OWN SIGNALS

        # use technical indicator to create signals
        # (we could obviously create whatever function we wanted for generating the signal dataframe)
        tech_ind = TechIndicator()
        tech_ind.create_tech_ind(spot_df, 'SMA', tech_params); signal_df = tech_ind.get_signal()

        return signal_df

    def construct_strategy_benchmark(self):

        ###### FILL IN WITH YOUR OWN BENCHMARK

        tsr_indices = TimeSeriesRequest(
            start_date = '01 Jan 1980',                     # start date
            finish_date = datetime.datetime.utcnow(),       # finish date
            freq = 'daily',                                 # intraday data
            data_source = 'quandl',                         # use Bloomberg as data source
            tickers = ["EURUSD"],                           # tickers to download
            vendor_tickers=['FRED/DEXUSEU'],
            fields = ['close'],                             # which fields to download
            vendor_fields = ['close'],
            cache_algo = 'cache_algo_return')               # how to return data)

        df = self.tsfactory.harvest_time_series(tsr_indices)

        df.columns = [x.split(".")[0] for x in df.columns]

        return df
Beispiel #41
0
class LoaderBBG(LoaderTemplate):

    def __init__(self):
        super(LoaderBBG, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request)

        data_frame = None
        self.logger.info("Request Bloomberg data")

        # do we need daily or intraday data?
        if (time_series_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']):

            # for events times/dates separately needs ReferenceDataRequest (when specified)
            if 'release-date-time-full' in time_series_request.fields:
                # experimental
                datetime_data_frame = self.get_reference_data(time_series_request_vendor, time_series_request)

                # remove fields 'release-date-time-full' from our request (and the associated field in the vendor)
                index = time_series_request.fields.index('release-date-time-full')
                time_series_request_vendor.fields.pop(index)
                time_series_request.fields.pop(index)

                # download all the other event fields (uses HistoricalDataRequest to Bloomberg)
                # concatenate with date time fields
                if len(time_series_request_vendor.fields) > 0:
                    events_data_frame = self.get_daily_data(time_series_request, time_series_request_vendor)

                    col = events_data_frame.index.name
                    events_data_frame = events_data_frame.reset_index(drop = False)

                    data_frame = pandas.concat([events_data_frame, datetime_data_frame], axis = 1)
                    temp = data_frame[col]
                    del data_frame[col]
                    data_frame.index = temp
                else:
                    data_frame = datetime_data_frame

            # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg
            else:
                data_frame = self.get_daily_data(time_series_request, time_series_request_vendor)

        # assume one ticker only
        # for intraday data we use IntradayDataRequest to Bloomberg
        if (time_series_request.freq in ['intraday', 'minute', 'hourly']):
            time_series_request_vendor.tickers = time_series_request_vendor.tickers[0]

            data_frame = self.download_intraday(time_series_request_vendor)

            cols = data_frame.columns.values
            data_frame.tz_localize('UTC')
            cols = time_series_request.tickers[0] + "." + cols
            data_frame.columns = cols

        self.logger.info("Completed request from Bloomberg.")

        return data_frame

    def get_daily_data(self, time_series_request, time_series_request_vendor):
        data_frame = self.download_daily(time_series_request_vendor)

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields, time_series_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def get_reference_data(self, time_series_request_vendor, time_series_request):
        end = datetime.datetime.today()
        end = end.replace(year = end.year + 1)

        time_series_request_vendor.finish_date = end

        self.logger.debug("Requesting ref for " + time_series_request_vendor.tickers[0] + " etc.")

        data_frame = self.download_ref(time_series_request_vendor)

        self.logger.debug("Waiting for ref...")

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields, time_series_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined

            data_frame = data_frame.convert_objects(convert_dates = 'coerce', convert_numeric= 'coerce')

        return data_frame

    # implement method in abstract superclass
    @abc.abstractmethod
    def kill_session(self):
        return

    @abc.abstractmethod
    def download_intraday(self, time_series_request):
        return

    @abc.abstractmethod
    def download_daily(self, time_series_request):
        return

    @abc.abstractmethod
    def download_ref(self, time_series_request):
        return