コード例 #1
0
    def send_bar_request(self, session, eventQueue, options, cid):
        logger = LoggerManager().getLogger(__name__)

        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest('ReferenceDataRequest')

        self.add_override(request, 'TIME_ZONE_OVERRIDE', 23)  # force GMT time
        self.add_override(request, 'INCLUDE_EXPIRED_CONTRACTS',
                          "Y")  # include expired contracts
        self.add_override(request, 'START_DT',
                          options.startDateTime.strftime('%Y%m%d'))
        self.add_override(request, 'END_DT',
                          options.endDateTime.strftime('%Y%m%d'))

        # Only one security/eventType per request
        for field in options.fields:
            request.getElement("fields").appendValue(field)

        for security in options.security:
            request.getElement("securities").appendValue(security)

        # Add user defined overrides for BBG request
        self.add_override_dict(request, options)

        logger.info("Sending Bloomberg Ref Request:" + str(request))
        session.sendRequest(request=request, correlationId=cid)
コード例 #2
0
    def process_message(self, msg):
        data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA)

        logger = LoggerManager().getLogger(__name__)

        data_vals = list(data.values())

        # Each price time point has multiple fields - marginally quicker
        tuple = [([
            bar.getElementAsFloat(self.OPEN),
            bar.getElementAsFloat(self.HIGH),
            bar.getElementAsFloat(self.LOW),
            bar.getElementAsFloat(self.CLOSE),
            bar.getElementAsInteger(self.VOLUME),
            bar.getElementAsInteger(self.NUM_EVENTS)
        ], bar.getElementAsDatetime(self.TIME)) for bar in data_vals]

        data_table = list(map(itemgetter(0), tuple))
        time_list = list(map(itemgetter(1), tuple))

        try:
            logger.info("Dates between " + str(time_list[0]) + " - " +
                        str(time_list[-1]))
        except:
            logger.info("No dates retrieved")
            return None

        # create pandas dataframe with the Bloomberg output
        return pd.DataFrame(
            data=data_table,
            index=time_list,
            columns=["open", "high", "low", "close", "volume", "events"])
コード例 #3
0
    def convert_csv_data_frame(self,
                               f_name,
                               category,
                               freq,
                               cutoff=None,
                               dateparse=None):
        """Converts CSV file to HDF5 file

        Parameters
        ----------
        f_name : str
            File name to be read
        category : str
            data category of file (used in HDF5 filename)
        freq : str
            intraday/daily frequency (used in HDF5 filename)
        cutoff : DateTime (optional)
            filter dates up to here
        dateparse : str
            date parser to use
        """

        logger = LoggerManager().getLogger(__name__)

        logger.info("About to read... " + f_name)

        data_frame = self.read_csv_data_frame(f_name,
                                              freq,
                                              cutoff=cutoff,
                                              dateparse=dateparse)

        category_f_name = self.create_cache_file_name(category)

        self.write_time_series_cache_to_disk(category_f_name, data_frame)
コード例 #4
0
    def send_bar_request(self, session, eventQueue, options, cid):
        logger = LoggerManager().getLogger(__name__)
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("IntradayBarRequest")

        # only one security/eventType per request
        request.set("security", options.security)
        request.set("eventType", options.event)
        request.set("interval", options.barInterval)

        # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT')

        if options.startDateTime is not None and options.endDateTime is not None:
            request.set("startDateTime", options.startDateTime)
            request.set("endDateTime", options.endDateTime)

        if options.gapFillInitialBar:
            request.append("gapFillInitialBar", True)

        # Add user defined overrides for BBG request
        self.add_override_dict(request, options)

        logger.info("Sending Intraday Bloomberg Request...")

        session.sendRequest(request=request, correlationId=cid)
コード例 #5
0
    def send_bar_request(self, session, eventQueue, options, cid):
        logger = LoggerManager().getLogger(__name__)

        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest('ReferenceDataRequest')

        self.add_override(request, 'TIME_ZONE_OVERRIDE', 23)  # force GMT time
        self.add_override(request, 'INCLUDE_EXPIRED_CONTRACTS',
                          "Y")  # include expired contracts
        self.add_override(request, 'START_DT',
                          options.startDateTime.strftime('%Y%m%d'))
        self.add_override(request, 'END_DT',
                          options.endDateTime.strftime('%Y%m%d'))

        # only one security/eventType per request
        for field in options.fields:
            request.getElement("fields").appendValue(field)

        for security in options.security:
            request.getElement("securities").appendValue(security)

        if options.overrides != {}:
            for k in options.overrides.keys():
                new_k = k

                # is there a pretty name for this?
                if k in super().convert_override_fields:
                    new_k = super().convert_override_fields[k]

                self.add_override(request, new_k, options.overrides[k])

        logger.info("Sending Bloomberg Ref Request:" + str(request))
        session.sendRequest(request=request, correlationId=cid)
コード例 #6
0
    def load_assets(self, br=None):
        ##### FILL IN WITH YOUR ASSET DATA
        from findatapy.util.loggermanager import LoggerManager
        logger = LoggerManager().getLogger(__name__)

        # For FX basket
        full_bkt = [
            'EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD', 'NZDUSD',
            'USDCHF', 'USDNOK', 'USDSEK'
        ]

        basket_dict = {}

        for i in range(0, len(full_bkt)):
            basket_dict[full_bkt[i]] = [full_bkt[i]]

        basket_dict['FX trend'] = full_bkt

        br = self.load_parameters(br=br)

        logger.info("Loading asset data...")

        vendor_tickers = [
            'FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL',
            'FRED/DEXCAUS', 'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS',
            'FRED/DEXSDUS'
        ]

        market_data_request = MarketDataRequest(
            start_date=br.start_date,  # start date
            finish_date=br.finish_date,  # finish date
            freq='daily',  # daily data
            data_source='quandl',  # use Quandl as data source
            tickers=full_bkt,  # ticker (Thalesians)
            fields=['close'],  # which fields to download
            vendor_tickers=vendor_tickers,  # ticker (Quandl)
            vendor_fields=['close'],  # which Bloomberg fields to download
            cache_algo='cache_algo_return')  # how to return data

        asset_df = self.market.fetch_market(market_data_request)

        # If web connection fails read from CSV
        if asset_df is None:
            import pandas

            asset_df = pandas.read_csv(
                "d:/fxcta.csv",
                index_col=0,
                parse_dates=['Date'],
                date_parser=lambda x: pandas.datetime.strptime(x, '%Y-%m-%d'))

        # Signalling variables
        spot_df = asset_df
        spot_df2 = None

        # asset_df

        return asset_df, spot_df, spot_df2, basket_dict
コード例 #7
0
ファイル: filter.py プロジェクト: vishalbelsare/findatapy
    def pad_time_series_columns(self, columns, data_frame):
        """Selects time series from a dataframe and if necessary creates
        empty columns

        Parameters
        ----------
        columns : str
            columns to be included with this keyword
        data_frame : DataFrame
            data frame to be filtered

        Returns
        -------
        DataFrame
        """
        old_columns = data_frame.columns.tolist()

        common_columns = [val for val in columns if val in old_columns]
        uncommon_columns = [val for val in columns if val not in old_columns]
        uncommon_columns = [str(x) for x in uncommon_columns]

        data_frame = data_frame[common_columns]

        if len(uncommon_columns) > 0:
            logger = LoggerManager().getLogger(__name__)

            logger.info(
                "Padding missing columns...")  # " + str(uncommon_columns))

            new_data_frame = pd.DataFrame(index=data_frame.index,
                                          columns=uncommon_columns)

            data_frame = pd.concat([data_frame, new_data_frame], axis=1)

            # Force new columns to float NaNs (not objects which causes
            # problems with newer pandas versions)
            # or to NaT if they are date columns
            for u in uncommon_columns:
                is_date = False

                for c in constants.always_date_columns:
                    if c in u:
                        is_date = True

                if is_date:
                    data_frame[u] = np.datetime64('NaT')
                else:
                    data_frame[u] = np.nan

            # SLOW method below
            # for x in uncommon_columns: data_frame.loc[:,x] = np.nan

        # Get columns in same order again
        data_frame = data_frame[columns]

        return data_frame
コード例 #8
0
    def kill_session(self, session):
        logger = LoggerManager().getLogger(__name__)
        if (session is not None):
            try:
                session.stop()

                logger.info("Stopping session...")
            finally:
                logger.info("Finally stopping session...")

            session = None
コード例 #9
0
    def run_day_of_month_analysis(self, trading_model, resample_freq='B'):
        from finmarketpy.economics.seasonality import Seasonality

        logger = LoggerManager().getLogger(__name__)

        calculations = Calculations()
        seas = Seasonality()
        trading_model.construct_strategy()
        pnl = trading_model.strategy_pnl()

        # Get seasonality by day of the month
        pnl = pnl.resample('B').mean()
        rets = calculations.calculate_returns(pnl).tz_localize(None)

        bus_day = seas.bus_day_of_month_seasonality(
            rets, add_average=True, resample_freq=resample_freq)

        # Get seasonality by month
        pnl = pnl.resample('BM').mean()
        rets = calculations.calculate_returns(pnl).tz_localize(None)
        month = seas.monthly_seasonality(rets)

        logger.info("About to plot seasonality...")
        style = Style()

        # Plotting spot over day of month/month of year
        style.color = 'Blues'
        style.scale_factor = trading_model.SCALE_FACTOR
        style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.png'
        style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.html'
        style.title = trading_model.FINAL_STRATEGY + ' day of month seasonality'
        style.display_legend = False
        style.color_2_series = [bus_day.columns[-1]]
        style.color_2 = ['red']  # red, pink
        style.linewidth_2 = 4
        style.linewidth_2_series = [bus_day.columns[-1]]
        style.y_axis_2_series = [bus_day.columns[-1]]

        self.chart.plot(bus_day, chart_type='line', style=style)

        style = Style()

        style.scale_factor = trading_model.SCALE_FACTOR
        style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.png'
        style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.html'
        style.title = trading_model.FINAL_STRATEGY + ' month of year seasonality'

        self.chart.plot(month, chart_type='line', style=style)

        return month
コード例 #10
0
    def load_assets(self, br = None):
        ##### FILL IN WITH YOUR ASSET DATA
        from findatapy.util.loggermanager import  LoggerManager
        logger = LoggerManager().getLogger(__name__)

        # for FX basket
        full_bkt    = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD',
                       'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK']

        basket_dict = {}

        for i in range(0, len(full_bkt)):
            basket_dict[full_bkt[i]] = [full_bkt[i]]

        basket_dict['FX trend'] = full_bkt

        br = self.load_parameters(br = br)

        logger.info("Loading asset data...")

        vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS',
                          'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS']

        market_data_request = MarketDataRequest(
                    start_date = br.start_date,                     # start date
                    finish_date = br.finish_date,                   # finish date
                    freq = 'daily',                                 # daily data
                    data_source = 'quandl',                         # use Quandl as data source
                    tickers = full_bkt,                             # ticker (Thalesians)
                    fields = ['close'],                                 # which fields to download
                    vendor_tickers = vendor_tickers,                    # ticker (Quandl)
                    vendor_fields = ['close'],                          # which Bloomberg fields to download
                    cache_algo = 'cache_algo_return')                # how to return data

        asset_df = self.market.fetch_market(market_data_request)

        # if web connection fails read from CSV
        if asset_df is None:
            import pandas

            asset_df = pandas.read_csv("d:/fxcta.csv", index_col=0, parse_dates=['Date'],
                                       date_parser = lambda x: pandas.datetime.strptime(x, '%Y-%m-%d'))

        # signalling variables
        spot_df = asset_df
        spot_df2 = None

        # asset_df

        return asset_df, spot_df, spot_df2, basket_dict
コード例 #11
0
    def get_daily_data(self, md_request, md_request_vendor):
        logger = LoggerManager().getLogger(__name__)

        data_frame = self.download_daily(md_request_vendor)

        # Convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            if data_frame.empty:
                logger.info("No tickers returned for...")

                try:
                    logger.info(str(md_request_vendor.tickers))
                except:
                    pass

                return None

            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

            # TODO if empty try downloading again a year later
            try:
                fields = self.translate_from_vendor_field(
                    returned_fields, md_request)
            except:
                print('Problem translating vendor field')

            tickers = self.translate_from_vendor_ticker(
                returned_tickers, md_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            # Convert numerical columns to floats and dates to dates (avoids
            # having object columns which can cause issues with later Pandas)
            data_frame = self.force_type_conversion(data_frame)

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

            # Force sorting of index
            try:
                data_frame = data_frame.sort_index()
            except:
                pass

        return data_frame
コード例 #12
0
    def process_message(self, msg):

        constants = DataConstants()
        # Process received events

        # SLOW loop (careful, not all the fields will be returned every time
        # hence need to include the field name in the tuple)
        # perhaps try to run in parallel?
        logger = LoggerManager().getLogger(__name__)

        ticker = msg.getElement('securityData').getElement(
            'security').getValue()
        fieldData = msg.getElement('securityData').getElement('fieldData')

        data = defaultdict(dict)

        # FASTER avoid calling getValue/getElement methods in blpapi,
        # very slow, better to cache variables
        for i in range(fieldData.numValues()):
            mini_field_data = fieldData.getValue(i)
            date = mini_field_data.getElement(0).getValue()

            for j in range(1, mini_field_data.numElements()):
                field_value = mini_field_data.getElement(j)

                data[(str(field_value.name()),
                      ticker)][date] = field_value.getValue()

        # ORIGINAL repeated calling getValue/getElement much slower
        # for i in range(fieldData.numValues()):
        #     for j in range(1, fieldData.getValue(i).numElements()):
        #         data[(str(fieldData.getValue(i).getElement(j).name()),
        #         ticker)][fieldData.getValue(i).getElement(0).getValue()] \
        #             = fieldData.getValue(i).getElement(j).getValue()

        data_frame = pd.DataFrame(data)

        # If obsolete ticker could return no values
        if data_frame.empty:
            return None
        else:
            # data_frame.columns = pd.MultiIndex.from_tuples(data,
            # names=['field', 'ticker'])
            data_frame.index = pd.to_datetime(data_frame.index)
            logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) +
                        ' - ' + str(data_frame.index[-1]))

        return data_frame
コード例 #13
0
ファイル: tradeanalysis.py プロジェクト: xGOTHA/finmarketpy
    def _run_strategy(self, trading_model, asset_df, spot_df, spot_df2, br, contract_value_df, pretty_portfolio_name):

        logger = LoggerManager().getLogger(__name__)

        logger.info("Calculating... " + str(pretty_portfolio_name))

        signal_df = trading_model.construct_signal(spot_df, spot_df2, br.tech_params, br, run_in_parallel=False)

        backtest = Backtest()

        backtest.calculate_trading_PnL(br, asset_df, signal_df, contract_value_df, False)
        ret_stats = backtest.portfolio_pnl_ret_stats()
        stats = str(backtest.portfolio_pnl_desc()[0])

        port = backtest.portfolio_cum().resample('B').mean()
        port.columns = [str(pretty_portfolio_name) + ' ' + stats]

        return port, ret_stats
コード例 #14
0
    def send_bar_request(self, session, eventQueue, options, cid):
        logger = LoggerManager().getLogger(__name__)

        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("HistoricalDataRequest")

        request.set("startDate", options.startDateTime.strftime('%Y%m%d'))
        request.set("endDate", options.endDateTime.strftime('%Y%m%d'))

        # Only one security/eventType per request
        for field in options.fields:
            request.getElement("fields").appendValue(field)

        for security in options.security:
            request.getElement("securities").appendValue(security)

        logger.info("Sending Bloomberg Daily Request:" + str(request))
        session.sendRequest(request=request, correlationId=cid)
コード例 #15
0
    def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields=None):
        """Write a DataFrame to disk in as an R compatible HDF5 file.

        Parameters
        ----------
        data_frame : DataFrame
            data frame to be written
        fname : str
            file path to be written
        fields : list(str)
            columns to be written
        """

        logger = LoggerManager().getLogger(__name__)

        fname_r = self.get_h5_filename(fname)

        logger.info("About to dump R binary HDF5 - " + fname_r)
        data_frame32 = data_frame.astype('float32')

        if fields is None:
            fields = data_frame32.columns.values

        # decompose date/time into individual fields (easier to pick up in R)
        data_frame32['Year'] = data_frame.index.year
        data_frame32['Month'] = data_frame.index.month
        data_frame32['Day'] = data_frame.index.day
        data_frame32['Hour'] = data_frame.index.hour
        data_frame32['Minute'] = data_frame.index.minute
        data_frame32['Second'] = data_frame.index.second
        data_frame32['Millisecond'] = data_frame.index.microsecond / 1000

        data_frame32 = data_frame32[[
            'Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'
        ] + fields]

        cols = data_frame32.columns

        store_export = pandas.HDFStore(fname_r)
        store_export.put('df_for_r', data_frame32, data_columns=cols)
        store_export.close()
コード例 #16
0
    def send_bar_request(self, session, eventQueue, options, cid):
        logger = LoggerManager().getLogger(__name__)

        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("IntradayTickRequest")

        # only one security/eventType per request
        request.set("security", options.security)
        request.getElement("eventTypes").appendValue("TRADE")
        # request.set("eventTypes", self._options.event)
        request.set("includeConditionCodes", True)

        # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT')

        if options.startDateTime and options.endDateTime:
            request.set("startDateTime", options.startDateTime)
            request.set("endDateTime", options.endDateTime)

        logger.info("Sending Tick Bloomberg Request...")

        session.sendRequest(request=request, correlationId=cid)
コード例 #17
0
    def clean_csv_file(self, f_name):
        """Cleans up CSV file (removing empty characters) before writing back to disk

        Parameters
        ----------
        f_name : str
            CSV file to be cleaned
        """
        logger = LoggerManager().getLogger(__name__)

        with codecs.open(f_name, 'rb', 'utf-8') as myfile:
            data = myfile.read()

            # clean file first if dirty
            if data.count('\x00'):
                logger.info('Cleaning CSV...')

                with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of:
                    of.write(data.replace('\x00', ''))

                shutil.move(f_name + '.tmp', f_name)
コード例 #18
0
    def start_bloomberg_session(self):

        constants = DataConstants()
        tries = 0

        session = None

        logger = LoggerManager().getLogger(__name__)

        # Try up to 5 times to start a session
        while (tries < 5):
            try:
                # fill SessionOptions
                sessionOptions = blpapi.SessionOptions()
                sessionOptions.setServerHost(constants.bbg_server)
                sessionOptions.setServerPort(constants.bbg_server_port)

                logger.info("Starting Bloomberg session...")

                # create a Session
                session = blpapi.Session(sessionOptions)

                # start a Session
                if not session.start():
                    logger.error("Failed to start session.")
                    return

                logger.info("Returning session...")

                tries = 5
            except:
                tries = tries + 1

        # BBGLowLevelTemplate._session = session

        if session is None:
            logger.error("Failed to start session.")
            return

        return session
コード例 #19
0
    def pad_time_series_columns(self, columns, data_frame):
        """Selects time series from a dataframe and if necessary creates empty columns

        Parameters
        ----------
        columns : str
            columns to be included with this keyword
        data_frame : DataFrame
            data frame to be filtered

        Returns
        -------
        DataFrame
        """
        old_columns = data_frame.columns

        common_columns = [val for val in columns if val in old_columns]
        uncommon_columns = [val for val in columns if val not in old_columns]
        uncommon_columns = [str(x) for x in uncommon_columns]

        data_frame = data_frame[common_columns]

        if len(uncommon_columns) > 0:
            logger = LoggerManager().getLogger(__name__)

            logger.info(
                "Padding missing columns...")  # " + str(uncommon_columns))

            new_data_frame = pd.DataFrame(index=data_frame.index,
                                          columns=uncommon_columns)

            data_frame = pd.concat([data_frame, new_data_frame], axis=1)

            # SLOW method below
            # for x in uncommon_columns: data_frame.loc[:,x] = np.nan

        # get columns in same order again
        data_frame = data_frame[columns]

        return data_frame
コード例 #20
0
    def process_message(self, msg):
        data = msg.getElement(self.TICK_DATA).getElement(self.TICK_DATA)
        logger = LoggerManager().getLogger(__name__)

        #  logger.info("Processing tick data for " + str(self._options.security))

        data_vals = data.values()

        # for item in list(data_vals):
        #     if item.hasElement(self.COND_CODE):
        #         cc = item.getElementAsString(self.COND_CODE)
        #     else:
        #         cc = ""
        #
        #     # each price time point has multiple fields - marginally quicker
        #     tuple.append(([item.getElementAsFloat(self.VALUE),
        #                     item.getElementAsInteger(self.TICK_SIZE)],
        #                     item.getElementAsDatetime(self.TIME)))

        # slightly faster this way (note, we are skipping trade & CC fields)
        tuple = [([
            item.getElementAsFloat(self.VALUE),
            item.getElementAsInteger(self.TICK_SIZE)
        ], item.getElementAsDatetime(self.TIME)) for item in data_vals]

        data_table = list(map(itemgetter(0), tuple))
        time_list = list(map(itemgetter(1), tuple))

        try:
            logger.info("Dates between " + str(time_list[0]) + " - " +
                        str(time_list[-1]))
        except:
            logger.info("No dates retrieved")
            return None

        # create pandas dataframe with the Bloomberg output
        return pd.DataFrame(data=data_table,
                            index=time_list,
                            columns=["close", "ticksize"])
コード例 #21
0
    def get_daily_data(self, market_data_request, market_data_request_vendor):
        logger = LoggerManager().getLogger(__name__)

        data_frame = self.download_daily(market_data_request_vendor)

        # convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            if data_frame.empty:
                logger.info("No tickers returned for...")

                try:
                    logger.info(str(market_data_request_vendor.tickers))
                except:
                    pass

                return None

            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

            # TODO if empty try downloading again a year later
            try:
                fields = self.translate_from_vendor_field(
                    returned_fields, market_data_request)
            except:
                print('Problem translating vendor field')

            tickers = self.translate_from_vendor_ticker(
                returned_tickers, market_data_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame
コード例 #22
0
ファイル: market.py プロジェクト: pkan0583/findatapy
class FXCrossFactory(object):
    def __init__(self, market_data_generator=None):
        self.logger = LoggerManager().getLogger(__name__)
        self.fxconv = FXConv()

        self.cache = {}

        self.calculations = Calculations()
        self.market_data_generator = market_data_generator

        return

    def flush_cache(self):
        self.cache = {}

    def get_fx_cross_tick(self,
                          start,
                          end,
                          cross,
                          cut="NYC",
                          source="dukascopy",
                          cache_algo='internet_load_return',
                          type='spot',
                          environment='backtest',
                          fields=['bid', 'ask']):

        if isinstance(cross, str):
            cross = [cross]

        market_data_request = MarketDataRequest(
            gran_freq="tick",
            freq_mult=1,
            freq='tick',
            cut=cut,
            fields=['bid', 'ask', 'bidv', 'askv'],
            cache_algo=cache_algo,
            environment=environment,
            start_date=start,
            finish_date=end,
            data_source=source,
            category='fx')

        market_data_generator = self.market_data_generator
        data_frame_agg = None

        for cr in cross:

            if (type == 'spot'):
                market_data_request.tickers = cr

                cross_vals = market_data_generator.fetch_market_data(
                    market_data_request)

                # if user only wants 'close' calculate that from the bid/ask fields
                if fields == ['close']:
                    cross_vals = cross_vals[[cr + '.bid',
                                             cr + '.ask']].mean(axis=1)
                    cross_vals.columns = [cr + '.close']

            if data_frame_agg is None:
                data_frame_agg = cross_vals
            else:
                data_frame_agg = data_frame_agg.join(cross_vals, how='outer')

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()
        return data_frame_agg

    def get_fx_cross(self,
                     start,
                     end,
                     cross,
                     cut="NYC",
                     source="bloomberg",
                     freq="intraday",
                     cache_algo='internet_load_return',
                     type='spot',
                     environment='backtest',
                     fields=['close']):

        if source == "gain" or source == 'dukascopy' or freq == 'tick':
            return self.get_fx_cross_tick(start,
                                          end,
                                          cross,
                                          cut=cut,
                                          source=source,
                                          cache_algo=cache_algo,
                                          type='spot',
                                          fields=fields)

        if isinstance(cross, str):
            cross = [cross]

        market_data_request_list = []
        freq_list = []
        type_list = []

        for cr in cross:
            market_data_request = MarketDataRequest(freq_mult=1,
                                                    cut=cut,
                                                    fields=['close'],
                                                    freq=freq,
                                                    cache_algo=cache_algo,
                                                    start_date=start,
                                                    finish_date=end,
                                                    data_source=source,
                                                    environment=environment)

            market_data_request.type = type
            market_data_request.cross = cr

            if freq == 'intraday':
                market_data_request.gran_freq = "minute"  # intraday

            elif freq == 'daily':
                market_data_request.gran_freq = "daily"  # daily

            market_data_request_list.append(market_data_request)

        data_frame_agg = []

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if DataConstants().market_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = DataConstants().market_thread_no['other']

        if market_data_request_list[0].data_source in DataConstants(
        ).market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request_list[0].data_source]

        # fudge, issue with multithreading and accessing HDF5 files
        # if self.market_data_generator.__class__.__name__ == 'CachedMarketDataGenerator':
        #    thread_no = 0

        if (thread_no > 0):
            pool = Pool(thread_no)

            # open the market data downloads in their own threads and return the results
            result = pool.map_async(self._get_individual_fx_cross,
                                    market_data_request_list)
            data_frame_agg = self.calculations.iterative_outer_join(
                result.get())

            # data_frame_agg = self.calculations.pandas_outer_join(result.get())

            # pool would have already been closed earlier
            # try:
            #    pool.close()
            #    pool.join()
            # except: pass
        else:
            for md_request in market_data_request_list:
                data_frame_agg.append(
                    self._get_individual_fx_cross(md_request))

            data_frame_agg = self.calculations.pandas_outer_join(
                data_frame_agg)

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()

        return data_frame_agg

    def _get_individual_fx_cross(self, market_data_request):
        cr = market_data_request.cross
        type = market_data_request.type
        freq = market_data_request.freq

        base = cr[0:3]
        terms = cr[3:6]

        if (type == 'spot'):
            # non-USD crosses
            if base != 'USD' and terms != 'USD':
                base_USD = self.fxconv.correct_notation('USD' + base)
                terms_USD = self.fxconv.correct_notation('USD' + terms)

                # TODO check if the cross exists in the database

                # download base USD cross
                market_data_request.tickers = base_USD
                market_data_request.category = 'fx'

                if base_USD + '.close' in self.cache:
                    base_vals = self.cache[base_USD + '.close']
                else:
                    base_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                    self.cache[base_USD + '.close'] = base_vals

                # download terms USD cross
                market_data_request.tickers = terms_USD
                market_data_request.category = 'fx'

                if terms_USD + '.close' in self.cache:
                    terms_vals = self.cache[terms_USD + '.close']
                else:
                    terms_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                    self.cache[terms_USD + '.close'] = terms_vals

                # if quoted USD/base flip to get USD terms
                if (base_USD[0:3] == 'USD'):
                    if 'USD' + base in '.close' in self.cache:
                        base_vals = self.cache['USD' + base + '.close']
                    else:
                        base_vals = 1 / base_vals
                        self.cache['USD' + base + '.close'] = base_vals

                # if quoted USD/terms flip to get USD terms
                if (terms_USD[0:3] == 'USD'):
                    if 'USD' + terms in '.close' in self.cache:
                        terms_vals = self.cache['USD' + terms + '.close']
                    else:
                        terms_vals = 1 / terms_vals
                        self.cache['USD' + terms + '.close'] = base_vals

                base_vals.columns = ['temp']
                terms_vals.columns = ['temp']

                cross_vals = base_vals.div(terms_vals, axis='index')
                cross_vals.columns = [cr + '.close']

                base_vals.columns = [base_USD + '.close']
                terms_vals.columns = [terms_USD + '.close']
            else:
                # if base == 'USD': non_USD = terms
                # if terms == 'USD': non_USD = base

                correct_cr = self.fxconv.correct_notation(cr)

                market_data_request.tickers = correct_cr
                market_data_request.category = 'fx'

                if correct_cr + '.close' in self.cache:
                    cross_vals = self.cache[correct_cr + '.close']
                else:
                    cross_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)

                    # flip if not convention
                    if (correct_cr != cr):
                        if cr + '.close' in self.cache:
                            cross_vals = self.cache[cr + '.close']
                        else:
                            cross_vals = 1 / cross_vals
                            self.cache[cr + '.close'] = cross_vals

                    self.cache[correct_cr + '.close'] = cross_vals

                # cross_vals = self.market_data_generator.harvest_time_series(market_data_request)
                cross_vals.columns.names = [cr + '.close']

        elif type[0:3] == "tot":
            if freq == 'daily':
                # download base USD cross
                market_data_request.tickers = base + 'USD'
                market_data_request.category = 'fx-tot'

                if type == "tot":
                    base_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                else:
                    x = 0

                # download terms USD cross
                market_data_request.tickers = terms + 'USD'
                market_data_request.category = 'fx-tot'

                if type == "tot":
                    terms_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                else:
                    pass

                base_rets = self.calculations.calculate_returns(base_vals)
                terms_rets = self.calculations.calculate_returns(terms_vals)

                cross_rets = base_rets.sub(terms_rets.iloc[:, 0], axis=0)

                # first returns of a time series will by NaN, given we don't know previous point
                cross_rets.iloc[0] = 0

                cross_vals = self.calculations.create_mult_index(cross_rets)
                cross_vals.columns = [cr + '-tot.close']

            elif freq == 'intraday':
                self.logger.info(
                    'Total calculated returns for intraday not implemented yet'
                )
                return None

        return cross_vals
コード例 #23
0
    def load_time_series(self, md_request):

        # if(BBGLowLevelTemplate._session is None):
        logger = LoggerManager().getLogger(__name__)

        session = self.start_bloomberg_session()

        # else:
        #    session = BBGLowLevelTemplate._session

        def download_data_frame(sess, eventQ, opt, ci):
            if opt.security is not None:
                self.send_bar_request(sess, eventQ, opt, ci)

                logger.info("Waiting for data to be returned...")

                return self.event_loop(sess)
            else:
                logger.warn("No ticker or field specified!")

                return None

        try:
            # if can't open the session, kill existing one
            # then try reopen (up to 5 times...)
            i = 0

            while i < 5:
                if session is not None:
                    if not session.openService("//blp/refdata"):
                        logger.info("Try reopening Bloomberg session... try " +
                                    str(i))
                        self.kill_session(
                            session)  # need to forcibly kill_session since
                        # can't always reopen
                        session = self.start_bloomberg_session()

                        if session is not None:
                            if session.openService("//blp/refdata"): i = 6
                else:
                    logger.info("Try opening Bloomberg session... try " +
                                str(i))
                    session = self.start_bloomberg_session()

                i = i + 1

            # Give error if still doesn't work after several tries..
            if not session.openService("//blp/refdata"):
                logger.error("Failed to open //blp/refdata")

                return

            logger.info("Creating request...")

            eventQueue = blpapi.EventQueue()
            # eventQueue = None

            # Create a request
            from blpapi import CorrelationId

            options = self.fill_options(md_request)

            # In some instances we might split the options if need to have
            # different overrides
            if isinstance(options, list):
                data_frame_list = []

                for op in options:
                    cid = CorrelationId()
                    data_frame_list.append(
                        download_data_frame(session, eventQueue, op, cid))

                data_frame = Calculations().join(data_frame_list)
            else:
                cid = CorrelationId()
                data_frame = download_data_frame(session, eventQueue, options,
                                                 cid)
        finally:
            # stop the session (will fail if NoneType)
            try:
                session.stop()
            except:
                pass

        return data_frame
コード例 #24
0
    def load_ticker(self, md_request):
        """Retrieves market data from external data source (in this case 
        Bloomberg)

        Parameters
        ----------
        md_request : MarketDataRequest
            contains all the various parameters detailing time series start 
            and finish, tickers etc

        Returns
        -------
        DataFrame
        """
        constants = DataConstants()

        md_request = MarketDataRequest(md_request=md_request)
        md_request_vendor = self.construct_vendor_md_request(md_request)

        data_frame = None

        logger = LoggerManager().getLogger(__name__)
        logger.info("Request Bloomberg data")

        # Do we need daily or intraday data?
        if (md_request.freq
                in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']):

            # Work out the fields which need to be downloaded via Bloomberg ref request (BDP) and
            # those that can be downloaded via Historical request (BDH)
            ref_fields = []
            ref_vendor_fields = []

            # Get user defined list of BBG fields/vendor fields which need to
            # be downloaded by BDP
            bbg_ref_fields = list(constants.bbg_ref_fields.keys())
            bbg_ref_vendor_fields = list(constants.bbg_ref_fields.values())

            for i in range(0, len(md_request.fields)):
                if md_request.fields[i] in bbg_ref_fields \
                        or md_request_vendor.fields[
                    i] in bbg_ref_vendor_fields:
                    ref_fields.append(md_request.fields[i])
                    ref_vendor_fields.append(md_request_vendor.fields[i])

            non_ref_fields = []
            non_ref_vendor_fields = []

            for i in range(0, len(md_request.fields)):
                if md_request.fields[i] not in bbg_ref_fields \
                        and md_request_vendor.fields[
                    i] not in bbg_ref_vendor_fields:
                    non_ref_fields.append(md_request.fields[i])
                    non_ref_vendor_fields.append(md_request_vendor.fields[i])

            # For certain cases, need to use ReferenceDataRequest
            # eg. for events times/dates, last tradeable date fields (when specified)
            if len(ref_fields) > 0:

                # Careful: make sure you copy the market data request object
                # (when threading, altering that can
                # cause concurrency issues!)
                old_fields = copy.deepcopy(md_request.fields)
                old_vendor_fields = copy.deepcopy(md_request_vendor.fields)

                # md_request = MarketDataRequest(md_request=md_request_copy)

                md_request.fields = ref_fields
                md_request.vendor_fields = ref_vendor_fields
                md_request_vendor = self.construct_vendor_md_request(
                    md_request)

                # Just select those reference fields to download via reference
                datetime_data_frame = self.get_reference_data(
                    md_request_vendor, md_request)

                # Download all the other event or non-ref fields
                # (uses HistoricalDataRequest to Bloomberg)
                # concatenate with date time fields
                if len(non_ref_fields) > 0:

                    md_request.fields = non_ref_fields
                    md_request.vendor_fields = non_ref_vendor_fields
                    md_request_vendor = self.construct_vendor_md_request(
                        md_request)

                    events_data_frame = self.get_daily_data(
                        md_request, md_request_vendor)

                    col = events_data_frame.index.name
                    events_data_frame = events_data_frame.reset_index(
                        drop=False)

                    data_frame = pd.concat(
                        [events_data_frame, datetime_data_frame], axis=1)
                    temp = data_frame[col]
                    del data_frame[col]
                    data_frame.index = temp
                else:
                    data_frame = datetime_data_frame

                md_request.fields = copy.deepcopy(old_fields)
                md_request_vendor.fields = copy.deepcopy(old_vendor_fields)

            # For all other daily/monthly/quarter data, we can use
            # HistoricalDataRequest to Bloomberg
            else:
                data_frame = self.get_daily_data(md_request, md_request_vendor)

                # if data_frame is not None:
                #     # Convert fields with release-dt to dates (special case!) and assume everything else numerical
                #     for c in data_frame.columns:
                #         try:
                #             if 'release-dt' in c:
                #                 data_frame[c] = (data_frame[c]).astype('int').astype(str).apply(
                #                         lambda x: pd.to_datetime(x, format='%Y%m%d'))
                #             else:
                #                 data_frame[c] = pd.to_numeric(data_frame[c])
                #         except:
                #             pass

        # Assume one ticker only for intraday data and use IntradayDataRequest
        # to Bloomberg
        if (md_request.freq
                in ['tick', 'intraday', 'second', 'minute', 'hourly']):
            md_request_vendor.tickers = \
            md_request_vendor.tickers[0]

            if md_request.freq in ['tick', 'second']:
                data_frame = self.download_tick(md_request_vendor)
            else:
                data_frame = self.download_intraday(md_request_vendor)

            if data_frame is not None:
                if data_frame.empty:
                    try:
                        logger.info("No tickers returned for: " +
                                    md_request_vendor.tickers)
                    except:
                        pass

                    return None

                cols = data_frame.columns.values

                import pytz

                try:
                    data_frame = data_frame.tz_localize(pytz.utc)
                except:
                    data_frame = data_frame.tz_convert(pytz.utc)

                cols = md_request.tickers[0] + "." + cols
                data_frame.columns = cols

        logger.info("Completed request from Bloomberg.")

        return data_frame
コード例 #25
0
    def process_message(self, msg):
        logger = LoggerManager().getLogger(__name__)
        data = collections.defaultdict(dict)

        # process received events
        securityDataArray = msg.getElement('securityData')

        index = 0
        single = False

        for securityData in list(securityDataArray.values()):

            ticker = securityData.getElementAsString("security")
            fieldData = securityData.getElement("fieldData")

            for field in fieldData.elements():
                if not field.isValid():
                    field_name = "%s" % field.name()

                    logger.error(field_name + " is NULL")
                elif field.isArray():
                    # iterate over complex data returns.
                    field_name = "%s" % field.name()

                    for i, row in enumerate(field.values()):
                        try:
                            field_val = re.findall(r'"(.*?)"', "%s" % row)[0]
                        except:
                            e = row.getElement(0)
                            # k = str(e.name())
                            field_val = e.getValue()

                        data[(field_name, ticker)][index] = field_val

                        index = index + 1
                else:
                    field_name = "%s" % field.name()
                    data[(field_name, ticker)][0] = field.getValueAsString()

                    index = index + 1
                    single = True  # no need to create multi-index late,
                    # because just row!! CAREFUL!! needed for futures expiries

            fieldExceptionArray = securityData.getElement("fieldExceptions")

            for fieldException in list(fieldExceptionArray.values()):
                errorInfo = fieldException.getElement("errorInfo")

                print(errorInfo.getElementAsString("category"), ":", \
                      fieldException.getElementAsString("fieldId"))
                print("stop")

        # Explicitly state from_dict (buggy if create pd.DataFrame(data)
        data_frame = pd.DataFrame.from_dict(data)

        # If obsolete ticker could return no values
        if data_frame.empty:
            return None
        else:
            logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) +
                        ' - ' + str(data_frame.index[-1]))

        return data_frame
コード例 #26
0
    # have vol target for each signal
    br.signal_vol_adjust = True
    br.signal_vol_target = 0.05
    br.signal_vol_max_leverage = 3
    br.signal_vol_periods = 60
    br.signal_vol_obs_in_year = 252
    br.signal_vol_rebalance_freq = 'BM'
    br.signal_vol_resample_freq = None

    tech_params = TechParams(); tech_params.sma_period = 200; indicator = 'SMA'

    # pick USD crosses in G10 FX
    # note: we are calculating returns from spot (it is much better to use to total return
    # indices for FX, which include carry)
    logger.info("Loading asset data...")

    tickers = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD',
               'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK']

    vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS',
                      'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS']

    md_request = MarketDataRequest(
                start_date = "01 Jan 1989",                     # start date
                finish_date = datetime.date.today(),            # finish date
                freq = 'daily',                                 # daily data
                data_source = 'quandl',                         # use Quandl as data source
                tickers = tickers,                              # ticker (findatapy)
                fields = ['close'],                                 # which fields to download
                vendor_tickers = vendor_tickers,                    # ticker (Quandl)
コード例 #27
0
class TradeAnalysis(object):
    """Applies some basic trade analysis for a trading strategy (as defined by TradingModel). Use PyFolio to create some
    basic trading statistics. Also allows you test multiple parameters for a specific strategy (like TC).

    """

    def __init__(self, engine = ChartConstants().chartfactory_default_engine):
        self.logger = LoggerManager().getLogger(__name__)
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' '
        self.SCALE_FACTOR = 3
        self.DEFAULT_PLOT_ENGINE = engine
        self.chart = Chart(engine=self.DEFAULT_PLOT_ENGINE)

        return

    def run_strategy_returns_stats(self, trading_model, index = None, engine = 'pyfolio'):
        """Plots useful statistics for the trading strategy (using PyFolio)

        Parameters
        ----------
        trading_model : TradingModel
            defining trading strategy
        index: DataFrame
            define strategy by a time series

        """

        if index is None:
            pnl = trading_model.get_strategy_pnl()
        else:
            pnl = index

        tz = Timezone()
        calculations = Calculations()

        if engine == 'pyfolio':
            # PyFolio assumes UTC time based DataFrames (so force this localisation)
            try:
                pnl = tz.localise_index_as_UTC(pnl)
            except: pass

            # set the matplotlib style sheet & defaults
            # at present this only works in Matplotlib engine
            try:
                matplotlib.rcdefaults()
                plt.style.use(ChartConstants().chartfactory_style_sheet['chartpy-pyfolio'])
            except: pass

            # TODO for intraday strategies, make daily

            # convert DataFrame (assumed to have only one column) to Series
            pnl = calculations.calculate_returns(pnl)
            pnl = pnl.dropna()
            pnl = pnl[pnl.columns[0]]
            fig = pf.create_returns_tear_sheet(pnl, return_fig=True)

            try:
                plt.savefig (trading_model.DUMP_PATH + "stats.png")
            except: pass

            plt.show()
        elif engine == 'finmarketpy':

            # assume we have TradingModel
            # to do to take in a time series
            from chartpy import Canvas, Chart
            pnl = trading_model.plot_strategy_pnl(silent_plot=True)                         # plot the final strategy
            individual = trading_model.plot_strategy_group_pnl_trades(silent_plot=True)     # plot the individual trade P&Ls

            pnl_comp = trading_model.plot_strategy_group_benchmark_pnl(silent_plot=True)    # plot all the cumulative P&Ls of each component
            ir_comp = trading_model.plot_strategy_group_benchmark_pnl_ir(silent_plot=True)  # plot all the IR of each component

            leverage = trading_model.plot_strategy_leverage(silent_plot=True)               # plot the leverage of the portfolio
            ind_lev = trading_model.plot_strategy_group_leverage(silent_plot=True)          # plot all the individual leverages

            canvas = Canvas([[pnl, individual],
                             [pnl_comp, ir_comp],
                             [leverage, ind_lev]]
                             )

            canvas.generate_canvas(silent_display=False, canvas_plotter='plain')

    def run_excel_trade_report(self, trading_model, excel_file = 'model.xlsx'):
        """
        run_excel_trade_report - Creates an Excel spreadsheet with model returns and latest trades

        Parameters
        ----------
        trading_model : TradingModel
            defining trading strategy (can be a list)

        """

        trading_model_list = trading_model

        if not(isinstance(trading_model_list, list)):
            trading_model_list = [trading_model]

        writer = pandas.ExcelWriter(excel_file, engine='xlsxwriter')

        for tm in trading_model_list:
            strategy_name = tm.FINAL_STRATEGY
            returns = tm.get_strategy_group_benchmark_pnl()

            returns.to_excel(writer, sheet_name=strategy_name + ' rets', engine='xlsxwriter')

            # write raw position/trade sizes
            self.save_positions_trades(tm, tm.get_strategy_signal(),tm.get_strategy_trade(),
                                       'pos', 'trades', writer)

            if hasattr(tm, '_strategy_signal_notional'):
                # write position/trade sizes scaled by notional
                self.save_positions_trades(tm,
                                           tm.get_strategy_signal_notional(),
                                           tm.get_strategy_trade_notional(), 'pos - Not', 'trades - Not', writer)

            if hasattr(tm, '_strategy_signal_contracts'):
                # write position/trade sizes in terms of contract sizes
                self.save_positions_trades(tm,
                                           tm.get_strategy_signal_contracts(),
                                           tm.get_strategy_trade_contracts(), 'pos - Cont', 'trades - Cont', writer)

        # TODO Add summary sheet comparing return statistics for all the different models in the list

        writer.save()
        writer.close()

    def save_positions_trades(self, tm, signals, trades, signal_caption, trade_caption, writer):
        signals.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' hist ' + signal_caption, engine='xlsxwriter')

        if hasattr(tm, 'STRIP'):
            strip = tm.STRIP

        recent_signals = tm.grab_signals(signals, date=[-1, -2, -5, -10, -20], strip=strip)
        recent_trades = tm.grab_signals(trades, date=[-1, -2, -5, -10, -20], strip=strip)

        recent_signals.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' ' + signal_caption, engine='xlsxwriter')
        recent_trades.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' ' + trade_caption, engine='xlsxwriter')

    def run_tc_shock(self, strategy, tc = None):
        if tc is None: tc = [0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0]

        parameter_list = [{'spot_tc_bp' : x } for x in tc]
        pretty_portfolio_names = [str(x) + 'bp' for x in tc]    # names of the portfolio
        parameter_type = 'TC analysis'                          # broad type of parameter name

        return self.run_arbitrary_sensitivity(strategy,
                                 parameter_list=parameter_list,
                                 pretty_portfolio_names=pretty_portfolio_names,
                                 parameter_type=parameter_type)

    ###### Parameters and signal generations (need to be customised for every model)
    def run_arbitrary_sensitivity(self, trading_model, parameter_list = None, parameter_names = None,
                                  pretty_portfolio_names = None, parameter_type = None):

        asset_df, spot_df, spot_df2, basket_dict = trading_model.load_assets()

        port_list = None
        ret_stats_list = []

        for i in range(0, len(parameter_list)):
            br = trading_model.load_parameters()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            trading_model.br = br   # for calculating signals

            signal_df = trading_model.construct_signal(spot_df, spot_df2, br.tech_params, br)

            backtest = Backtest()
            self.logger.info("Calculating... " + str(pretty_portfolio_names[i]))

            backtest.calculate_trading_PnL(br, asset_df, signal_df)
            ret_stats_list.append(backtest.get_portfolio_pnl_ret_stats())
            stats = str(backtest.get_portfolio_pnl_desc()[0])

            port = backtest.get_cumportfolio().resample('B').mean()
            port.columns = [str(pretty_portfolio_names[i]) + ' ' + stats]

            if port_list is None:
                port_list = port
            else:
                port_list = port_list.join(port)

        # reset the parameters of the strategy
        trading_model.br = trading_model.load_parameters()

        style = Style()

        ir = [t.inforatio()[0] for t in ret_stats_list]

        # if we have too many combinations remove legend and use scaled shaded colour
        # if len(port_list) > 10:
            # style.color = 'Blues'
            # style.display_legend = False

        # plot all the variations
        style.resample = 'B'
        style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + '.png'
        style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + '.html'
        style.scale_factor = self.SCALE_FACTOR
        style.title = trading_model.FINAL_STRATEGY + ' ' + parameter_type

        self.chart.plot(port_list, chart_type='line', style=style)

        # plot all the IR in a bar chart form (can be easier to read!)
        style = Style()
        style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + ' IR.png'
        style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + ' IR.html'
        style.scale_factor = self.SCALE_FACTOR
        style.title = trading_model.FINAL_STRATEGY + ' ' + parameter_type
        summary = pandas.DataFrame(index = pretty_portfolio_names, data = ir, columns = ['IR'])

        self.chart.plot(summary, chart_type='bar', style=style)

        return port_list

    ###### Parameters and signal generations (need to be customised for every model)
    ###### Plot all the output seperately
    def run_arbitrary_sensitivity_separately(self, trading_model, parameter_list = None,
                                             pretty_portfolio_names = None, strip = None):

        # asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets()
        final_strategy = trading_model.FINAL_STRATEGY

        for i in range(0, len(parameter_list)):
            br = trading_model.fill_backtest_request()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            trading_model.FINAL_STRATEGY = final_strategy + " " + pretty_portfolio_names[i]

            self.logger.info("Calculating... " + pretty_portfolio_names[i])
            trading_model.br = br
            trading_model.construct_strategy(br = br)

            trading_model.plot_strategy_pnl()
            trading_model.plot_strategy_leverage()
            trading_model.plot_strategy_group_benchmark_pnl(strip = strip)

        # reset the parameters of the strategy
        trading_model.br = trading_model.fill_backtest_request()
        trading_model.FINAL_STRATEGY = final_strategy

    def run_day_of_month_analysis(self, trading_model):
        from finmarketpy.economics.seasonality import Seasonality

        calculations = Calculations()
        seas = Seasonality()
        trading_model.construct_strategy()
        pnl = trading_model.get_strategy_pnl()

        # get seasonality by day of the month
        pnl = pnl.resample('B').mean()
        rets = calculations.calculate_returns(pnl)
        bus_day = seas.bus_day_of_month_seasonality(rets, add_average = True)

        # get seasonality by month
        pnl = pnl.resample('BM').mean()
        rets = calculations.calculate_returns(pnl)
        month = seas.monthly_seasonality(rets)

        self.logger.info("About to plot seasonality...")
        style = Style()

        # Plotting spot over day of month/month of year
        style.color = 'Blues'
        style.scale_factor = self.SCALE_FACTOR
        style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.png'
        style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.html'
        style.title = trading_model.FINAL_STRATEGY + ' day of month seasonality'
        style.display_legend = False
        style.color_2_series = [bus_day.columns[-1]]
        style.color_2 = ['red'] # red, pink
        style.linewidth_2 = 4
        style.linewidth_2_series = [bus_day.columns[-1]]
        style.y_axis_2_series = [bus_day.columns[-1]]

        self.chart.plot(bus_day, chart_type='line', style=style)

        style = Style()

        style.scale_factor = self.SCALE_FACTOR
        style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.png'
        style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.html'
        style.title = trading_model.FINAL_STRATEGY + ' month of year seasonality'

        self.chart.plot(month, chart_type='line', style=style)

        return month
コード例 #28
0
class BBGLowLevelTick(BBGLowLevelTemplate):

    def __init__(self):
        super(BBGLowLevelTick, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

        # constants
        self.TICK_DATA = blpapi.Name("tickData")
        self.COND_CODE = blpapi.Name("conditionCodes")
        self.TICK_SIZE = blpapi.Name("size")
        self.TIME = blpapi.Name("time")
        self.TYPE = blpapi.Name("type")
        self.VALUE = blpapi.Name("value")
        self.RESPONSE_ERROR = blpapi.Name("responseError")
        self.CATEGORY = blpapi.Name("category")
        self.MESSAGE = blpapi.Name("message")
        self.SESSION_TERMINATED = blpapi.Name("SessionTerminated")

    def combine_slices(self, data_frame, data_frame_slice):
        return data_frame.append(data_frame_slice)

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, market_data_request):
        self._options = OptionsBBG()

        self._options.security = market_data_request.tickers[0]    # get 1st ticker only!
        self._options.event = market_data_request.trade_side.upper()
        # self._options.barInterval = market_data_request.freq_mult
        self._options.startDateTime = market_data_request.start_date
        self._options.endDateTime = market_data_request.finish_date
        # self._options.gapFillInitialBar = False

        if hasattr(self._options.startDateTime, 'microsecond'):
            self._options.startDateTime = self._options.startDateTime.replace(microsecond=0)

        if hasattr(self._options.endDateTime, 'microsecond'):
            self._options.endDateTime = self._options.endDateTime.replace(microsecond=0)

        return self._options

    # iterate through Bloomberg output creating a DataFrame output
    # implements abstract method
    def process_message(self, msg):
        data = msg.getElement(self.TICK_DATA).getElement(self.TICK_DATA)

        self.logger.info("Processing tick data for " + str(self._options.security))
        tuple = []

        data_vals = data.values()

        # for item in list(data_vals):
        #     if item.hasElement(self.COND_CODE):
        #         cc = item.getElementAsString(self.COND_CODE)
        #     else:
        #         cc = ""
        #
        #     # each price time point has multiple fields - marginally quicker
        #     tuple.append(([item.getElementAsFloat(self.VALUE),
        #                     item.getElementAsInteger(self.TICK_SIZE)],
        #                     item.getElementAsDatetime(self.TIME)))

        # slightly faster this way (note, we are skipping trade & CC fields)
        tuple = [([item.getElementAsFloat(self.VALUE),
                             item.getElementAsInteger(self.TICK_SIZE)],
                             item.getElementAsDatetime(self.TIME)) for item in data_vals]

        data_table = list(map(itemgetter(0), tuple))
        time_list = list(map(itemgetter(1), tuple))

        try:
            self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1]))
        except:
            self.logger.info("No dates retrieved")
            return None

        # create pandas dataframe with the Bloomberg output
        return pandas.DataFrame(data = data_table, index = time_list,
                      columns=['close', 'ticksize'])

    # implement abstract method: create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("IntradayTickRequest")

        # only one security/eventType per request
        request.set("security", self._options.security)
        request.getElement("eventTypes").appendValue("TRADE")
        # request.set("eventTypes", self._options.event)
        request.set("includeConditionCodes", True)

        # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT')

        if self._options.startDateTime and self._options.endDateTime:
            request.set("startDateTime", self._options.startDateTime)
            request.set("endDateTime", self._options.endDateTime)

        self.logger.info("Sending Tick Bloomberg Request...")

        session.sendRequest(request)
コード例 #29
0
class BBGLowLevelIntraday(BBGLowLevelTemplate):

    def __init__(self):
        super(BBGLowLevelIntraday, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

        # constants
        self.BAR_DATA = blpapi.Name("barData")
        self.BAR_TICK_DATA = blpapi.Name("barTickData")
        self.OPEN = blpapi.Name("open")
        self.HIGH = blpapi.Name("high")
        self.LOW = blpapi.Name("low")
        self.CLOSE = blpapi.Name("close")
        self.VOLUME = blpapi.Name("volume")
        self.NUM_EVENTS = blpapi.Name("numEvents")
        self.TIME = blpapi.Name("time")

    def combine_slices(self, data_frame, data_frame_slice):
        return data_frame.append(data_frame_slice)

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, market_data_request):
        self._options = OptionsBBG()

        self._options.security = market_data_request.tickers[0]    # get 1st ticker only!
        self._options.event = market_data_request.trade_side.upper()
        self._options.barInterval = market_data_request.freq_mult
        self._options.startDateTime = market_data_request.start_date
        self._options.endDateTime = market_data_request.finish_date
        self._options.gapFillInitialBar = False

        if hasattr(self._options.startDateTime, 'microsecond'):
            self._options.startDateTime = self._options.startDateTime.replace(microsecond=0)

        if hasattr(self._options.endDateTime, 'microsecond'):
            self._options.endDateTime = self._options.endDateTime.replace(microsecond=0)

        return self._options

    # iterate through Bloomberg output creating a DataFrame output
    # implements abstract method
    def process_message(self, msg):
        data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA)

        self.logger.info("Processing intraday data for " + str(self._options.security))

        data_vals = list(data.values())

        # data_matrix = numpy.zeros([len(data_vals), 6])
        # data_matrix.fill(numpy.nan)
        #
        # date_index = [None] * len(data_vals)
        #
        # for i in range(0, len(data_vals)):
        #     data_matrix[i][0] = data_vals[i].getElementAsFloat(self.OPEN)
        #     data_matrix[i][1] = data_vals[i].getElementAsFloat(self.HIGH)
        #     data_matrix[i][2] = data_vals[i].getElementAsFloat(self.LOW)
        #     data_matrix[i][3] = data_vals[i].getElementAsFloat(self.CLOSE)
        #     data_matrix[i][4] = data_vals[i].getElementAsInteger(self.VOLUME)
        #     data_matrix[i][5] = data_vals[i].getElementAsInteger(self.NUM_EVENTS)
        #
        #     date_index[i] = data_vals[i].getElementAsDatetime(self.TIME)
        #
        # self.logger.info("Dates between " + str(date_index[0]) + " - " + str(date_index[-1]))
        #
        # # create pandas dataframe with the Bloomberg output
        # return pandas.DataFrame(data = data_matrix, index = date_index,
        #                columns=['open', 'high', 'low', 'close', 'volume', 'events'])

        ## for loop method is touch slower
        # time_list = []
        # data_table = []

        # for bar in data_vals:
        #     data_table.append([bar.getElementAsFloat(self.OPEN),
        #                  bar.getElementAsFloat(self.HIGH),
        #                  bar.getElementAsFloat(self.LOW),
        #                  bar.getElementAsFloat(self.CLOSE),
        #                  bar.getElementAsInteger(self.VOLUME),
        #                  bar.getElementAsInteger(self.NUM_EVENTS)])
        #
        #     time_list.append(bar.getElementAsDatetime(self.TIME))

        # each price time point has multiple fields - marginally quicker
        tuple = [([bar.getElementAsFloat(self.OPEN),
                        bar.getElementAsFloat(self.HIGH),
                        bar.getElementAsFloat(self.LOW),
                        bar.getElementAsFloat(self.CLOSE),
                        bar.getElementAsInteger(self.VOLUME),
                        bar.getElementAsInteger(self.NUM_EVENTS)],
                        bar.getElementAsDatetime(self.TIME)) for bar in data_vals]

        data_table = list(map(itemgetter(0), tuple))
        time_list = list(map(itemgetter(1), tuple))

        try:
            self.logger.info("Dates between " + str(time_list[0]) + " - " + str(time_list[-1]))
        except:
            self.logger.info("No dates retrieved")
            return None

        # create pandas dataframe with the Bloomberg output
        return pandas.DataFrame(data = data_table, index = time_list,
                      columns=['open', 'high', 'low', 'close', 'volume', 'events'])

    # implement abstract method: create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("IntradayBarRequest")

        # only one security/eventType per request
        request.set("security", self._options.security)
        request.set("eventType", self._options.event)
        request.set("interval", self._options.barInterval)

        # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT')

        if self._options.startDateTime and self._options.endDateTime:
            request.set("startDateTime", self._options.startDateTime)
            request.set("endDateTime", self._options.endDateTime)

        if self._options.gapFillInitialBar:
            request.append("gapFillInitialBar", True)

        self.logger.info("Sending Intraday Bloomberg Request...")

        session.sendRequest(request)
コード例 #30
0
class BBGLowLevelRef(BBGLowLevelTemplate):

    def __init__(self):
        super(BBGLowLevelRef, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
        self._options = []

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, market_data_request):
        self._options = OptionsBBG()

        self._options.security = market_data_request.tickers
        self._options.startDateTime = market_data_request.start_date
        self._options.endDateTime = market_data_request.finish_date
        self._options.fields = market_data_request.fields

        return self._options

    def process_message(self, msg):
        data = collections.defaultdict(dict)

        # process received events
        securityDataArray = msg.getElement('securityData')

        index = 0

        for securityData in list(securityDataArray.values()):
            ticker = securityData.getElementAsString("security")
            fieldData = securityData.getElement("fieldData")

            for field in fieldData.elements():
                if not field.isValid():
                    field_name = "%s" % field.name()

                    self.logger.error(field_name + " is NULL")
                elif field.isArray():
                    # iterate over complex data returns.
                    field_name = "%s" % field.name()

                    for i, row in enumerate(field.values()):
                        data[(field_name, ticker)][index] = re.findall(r'"(.*?)"', "%s" % row)[0]

                        index = index + 1
                # else:
                    # vals.append(re.findall(r'"(.*?)"', "%s" % row)[0])
                    # print("%s = %s" % (field.name(), field.getValueAsString()))

            fieldExceptionArray = securityData.getElement("fieldExceptions")

            for fieldException in list(fieldExceptionArray.values()):
                errorInfo = fieldException.getElement("errorInfo")
                print(errorInfo.getElementAsString("category"), ":", \
                    fieldException.getElementAsString("fieldId"))

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not(data_frame.empty)):
            data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            self.logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    def combine_slices(self, data_frame, data_frame_slice):
        if (data_frame_slice.columns.get_level_values(1).values[0]
            not in data_frame.columns.get_level_values(1).values):

            return data_frame.join(data_frame_slice, how="outer")

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest('ReferenceDataRequest')

        self.add_override(request, 'TIME_ZONE_OVERRIDE', 23)    # force GMT time
        self.add_override(request, 'START_DT', self._options.startDateTime.strftime('%Y%m%d'))
        self.add_override(request, 'END_DT', self._options.endDateTime.strftime('%Y%m%d'))

        # only one security/eventType per request
        for field in self._options.fields:
            request.getElement("fields").appendValue(field)

        for security in self._options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Ref Request:" + str(request))
        session.sendRequest(request)
コード例 #31
0
class BBGLowLevelDaily(BBGLowLevelTemplate):

    def __init__(self):
        super(BBGLowLevelDaily, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
        self._options = []

    def combine_slices(self, data_frame, data_frame_slice):
        if (data_frame_slice.columns.get_level_values(1).values[0]
            not in data_frame.columns.get_level_values(1).values):

            return data_frame.join(data_frame_slice, how="outer")

        return data_frame

    # populate options for Bloomberg request for asset daily request
    def fill_options(self, market_data_request):
        self._options = OptionsBBG()

        self._options.security = market_data_request.tickers
        self._options.startDateTime = market_data_request.start_date
        self._options.endDateTime = market_data_request.finish_date
        self._options.fields = market_data_request.fields

        return self._options

    def process_message(self, msg):
        # Process received events
        ticker = msg.getElement('securityData').getElement('security').getValue()
        fieldData = msg.getElement('securityData').getElement('fieldData')

        # SLOW loop (careful, not all the fields will be returned every time
        # hence need to include the field name in the tuple)
        data = defaultdict(dict)

        for i in range(fieldData.numValues()):
            for j in range(1, fieldData.getValue(i).numElements()):
                data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \
                    = fieldData.getValue(i).getElement(j).getValue()

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not(data_frame.empty)):
            # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            data_frame.index = pandas.to_datetime(data_frame.index)
            self.logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("HistoricalDataRequest")

        request.set("startDate", self._options.startDateTime.strftime('%Y%m%d'))
        request.set("endDate", self._options.endDateTime.strftime('%Y%m%d'))

        # # only one security/eventType per request
        for field in self._options.fields:
            request.getElement("fields").appendValue(field)

        for security in self._options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Daily Request:" + str(request))
        session.sendRequest(request)
コード例 #32
0
class IOEngine(object):
    """Write and reads time series data to disk in various formats, CSV, HDF5 (fixed and table formats) and MongoDB/Arctic.

    Can be used to save down output of finmarketpy backtests and also to cache market data locally.

    Also supports BColz (but not currently stable). Planning to add other interfaces such as SQL etc.

    """

    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)

    ### functions to handle Excel on disk
    def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False):
        """Writes Pandas data frame to disk in Excel format

        Parameters
        ----------
        fname : str
            Excel filename to be written to
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        create_new : boolean
            to create a new Excel file
        """

        if (create_new):
            writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
        else:
            if os.path.isfile(fname):
                book = load_workbook(fname)
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
                writer.book = book
                writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
            else:
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')

        data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter')

        writer.save()
        writer.close()

    def write_time_series_to_excel_writer(self, writer, sheet, data_frame):
        """Writes Pandas data frame to disk in Excel format for a writer

        Parameters
        ----------
        writer : ExcelWriter
            File handle to use for writing Excel file to disk
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        """
        data_frame.to_excel(writer, sheet, engine='xlsxwriter')

    def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff=None, dateparse=None,
                              postfix='.close', intraday_tz='UTC'):
        """Reads Excel from disk into DataFrame

        Parameters
        ----------
        f_name : str
            Excel file path to read
        freq : str
            Frequency of data to read (intraday/daily etc)
        cutoff : DateTime (optional)
            end date to read up to
        dateparse : str (optional)
            date parser to use
        postfix : str (optional)
            postfix to add to each columns
        intraday_tz : str
            timezone of file if uses intraday data

        Returns
        -------
        DataFrame
        """

        return self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse,
                                        postfix=postfix, intraday_tz=intraday_tz, excel_sheet=excel_sheet)

    def remove_time_series_cache_on_disk(self, fname, engine='hdf5_fixed', db_server='127.0.0.1', db_port='6379',
                                         timeout=10, username=None,
                                         password=None):

        if 'hdf5' in engine:
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            pass
        elif (engine == 'redis'):
            import redis

            fname = os.path.basename(fname).replace('.', '_')

            try:
                r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout,
                                      socket_connect_timeout=timeout)

                if (fname == 'flush_all_keys'):
                    r.flushall()
                else:
                    # allow deletion of keys by pattern matching
                    if "*" in fname:
                        x = r.keys(fname)

                        if len(x) > 0:
                            r.delete(x)

                    r.delete(fname)

            except Exception as e:
                self.logger.warning("Cannot delete non-existent key " + fname + " in Redis: " + str(e))

        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load MongoDB library: ' + fname)

            if username is not None and password is not None:
                c = pymongo.MongoClient(
                    host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                    connect=False)  # , username=username, password=password)
            else:
                c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

            store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS,
                           connectTimeoutMS=socketTimeoutMS)

            store.delete_library(fname)

            c.close()

            self.logger.info("Deleted MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # delete the old copy
            try:
                os.remove(h5_filename)
            except:
                pass

    ### functions to handle HDF5 on disk
    def write_time_series_cache_to_disk(self, fname, data_frame,
                                        engine='hdf5_fixed', append_data=False, db_server=DataConstants().db_server,
                                        db_port=DataConstants().db_port, username=None, password=None,
                                        filter_out_matching=None, timeout=10):
        """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        engine : str
            'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this
            'hdf5_table' - use HDF5 table format, slower but can append to
            'parquet' - use Parquet
            'arctic' - use Arctic/MongoDB database
            'redis' - use Redis
        append_data : bool
            False - write a fresh copy of data on disk each time
            True - append data to disk
        db_server : str
            Database server for arctic (default: '127.0.0.1')
        timeout : int
            Number of seconds to do timeout
        """

        # default HDF5 format
        hdf5_format = 'fixed'

        if 'hdf5' in engine:
            hdf5_format = engine.split('_')[1]
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            data_frame.columns = self.find_replace_chars(data_frame.columns, _invalid_chars, _replace_chars)
            data_frame.columns = ['A_' + x for x in data_frame.columns]

            data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns')

            bcolzpath = self.get_bcolz_filename(fname)
            shutil.rmtree(bcolzpath, ignore_errors=True)
            zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath)
        elif (engine == 'redis'):
            import redis

            fname = os.path.basename(fname).replace('.', '_')

            try:
                r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout,
                                      socket_connect_timeout=timeout)

                if isinstance(data_frame, pandas.DataFrame):
                    r.set(fname, data_frame.to_msgpack(compress='blosc'))

                self.logger.info("Pushed " + fname + " to Redis")
            except Exception as e:
                self.logger.warning("Couldn't push " + fname + " to Redis: " + str(e))

        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load Arctic/MongoDB library: ' + fname)

            if username is not None and password is not None:
                c = pymongo.MongoClient(
                    host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                    connect=False)  # , username=username, password=password)
            else:
                c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

            store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS,
                           connectTimeoutMS=socketTimeoutMS)

            database = None

            try:
                database = store[fname]
            except:
                pass

            if database is None:
                store.initialize_library(fname, audit=False)
                self.logger.info("Created MongoDB library: " + fname)
            else:
                self.logger.info("Got MongoDB library: " + fname)

            # Access the library
            library = store[fname]

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            if filter_out_matching is not None:
                cols = data_frame.columns

                new_cols = []

                for col in cols:
                    if filter_out_matching not in col:
                        new_cols.append(col)

                data_frame = data_frame[new_cols]

            # can duplicate values if we have existing dates
            if append_data:
                library.append(fname, data_frame)
            else:
                library.write(fname, data_frame)

            c.close()

            self.logger.info("Written MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # append data only works for HDF5 stored as tables (but this is much slower than fixed format)
            # removes duplicated entries at the end
            if append_data:
                store = pandas.HDFStore(h5_filename, format=hdf5_format, complib="blosc", complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                # get last row which matches and remove everything after that (because append
                # function doesn't check for duplicated rows
                nrows = len(store['data'].index)
                last_point = data_frame.index[-1]

                i = nrows - 1

                while (i > 0):
                    read_index = store.select('data', start=i, stop=nrows).index[0]

                    if (read_index <= last_point): break

                    i = i - 1

                # remove rows at the end, which are duplicates of the incoming time series
                store.remove(key='data', start=i, stop=nrows)
                store.put(key='data', value=data_frame, format=hdf5_format, append=True)
                store.close()
            else:
                h5_filename_temp = self.get_h5_filename(fname + ".temp")

                # delete the old copy
                try:
                    os.remove(h5_filename_temp)
                except:
                    pass

                store = pandas.HDFStore(h5_filename_temp, format=hdf5_format, complib="blosc", complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                store.put(key='data', value=data_frame, format=hdf5_format)
                store.close()

                # delete the old copy
                try:
                    os.remove(h5_filename)
                except:
                    pass

                # once written to disk rename
                os.rename(h5_filename_temp, h5_filename)

            self.logger.info("Written HDF5: " + fname)

        elif (engine == 'parquet'):
            if fname[-5:] != '.gzip':
                fname = fname + '.gzip'

            data_frame.to_parquet(fname, compression='gzip')

            self.logger.info("Written Parquet: " + fname)

    def get_h5_filename(self, fname):
        """Strips h5 off filename returning first portion of filename

        Parameters
        ----------
        fname : str
            h5 filename to strip

        Returns
        -------
        str
        """
        if fname[-3:] == '.h5':
            return fname

        return fname + ".h5"

    def get_bcolz_filename(self, fname):
        """Strips bcolz off filename returning first portion of filename

        Parameters
        ----------
        fname : str
            bcolz filename to strip

        Returns
        -------
        str
        """
        if fname[-6:] == '.bcolz':
            return fname

        return fname + ".bcolz"

    def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields=None):
        """Write a DataFrame to disk in as an R compatible HDF5 file.

        Parameters
        ----------
        data_frame : DataFrame
            data frame to be written
        fname : str
            file path to be written
        fields : list(str)
            columns to be written
        """
        fname_r = self.get_h5_filename(fname)

        self.logger.info("About to dump R binary HDF5 - " + fname_r)
        data_frame32 = data_frame.astype('float32')

        if fields is None:
            fields = data_frame32.columns.values

        # decompose date/time into individual fields (easier to pick up in R)
        data_frame32['Year'] = data_frame.index.year
        data_frame32['Month'] = data_frame.index.month
        data_frame32['Day'] = data_frame.index.day
        data_frame32['Hour'] = data_frame.index.hour
        data_frame32['Minute'] = data_frame.index.minute
        data_frame32['Second'] = data_frame.index.second
        data_frame32['Millisecond'] = data_frame.index.microsecond / 1000

        data_frame32 = data_frame32[
            ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'] + fields]

        cols = data_frame32.columns

        store_export = pandas.HDFStore(fname_r)
        store_export.put('df_for_r', data_frame32, data_columns=cols)
        store_export.close()

    def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None, finish_date=None,
                                         db_server=DataConstants().db_server,
                                         db_port=DataConstants().db_port, username=None, password=None):
        """Reads time series cache from disk in either HDF5 or bcolz

        Parameters
        ----------
        fname : str (or list)
            file to be read from
        engine : str (optional)
            'hd5' - reads HDF5 files (default)
            'arctic' - reads from Arctic/MongoDB database
            'bcolz' = reads from bcolz file (not fully implemented)
        start_date : str/datetime (optional)
            Start date
        finish_date : str/datetime (optional)
            Finish data
        db_server : str
            IP address of MongdDB (default '127.0.0.1')

        Returns
        -------
        DataFrame
        """

        logger = LoggerManager.getLogger(__name__)

        data_frame_list = []

        if not(isinstance(fname, list)):
            if '*' in fname:
                fname = glob.glob(fname)
            else:
                fname = [fname]

        for fname_single in fname:
            logger.debug("Reading " + fname_single + "..")

            if (engine == 'bcolz'):
                try:
                    name = self.get_bcolz_filename(fname_single)
                    zlens = bcolz.open(rootdir=name)
                    data_frame = zlens.todataframe()

                    data_frame.index = pandas.DatetimeIndex(data_frame['DTS_'])
                    data_frame.index.name = 'Date'
                    del data_frame['DTS_']

                    # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas
                    data_frame.columns = self.find_replace_chars(data_frame.columns, _replace_chars, _invalid_chars)
                    data_frame.columns = [x[2:] for x in data_frame.columns]
                except:
                    data_frame = None

            elif (engine == 'redis'):
                import redis

                fname_single = os.path.basename(fname_single).replace('.', '_')

                msg = None

                try:
                    r = redis.StrictRedis(host=db_server, port=db_port, db=0)
                    msg = r.get(fname_single)

                except:
                    self.logger.info("Cache not existent for " + fname_single + " in Redis")

                if msg is None:
                    data_frame = None
                else:

                    self.logger.info('Load Redis cache: ' + fname_single)

                    data_frame = pandas.read_msgpack(msg)

            elif (engine == 'arctic'):
                socketTimeoutMS = 2 * 1000

                import pymongo
                from arctic import Arctic

                fname_single = os.path.basename(fname_single).replace('.', '_')

                self.logger.info('Load Arctic/MongoDB library: ' + fname_single)

                if username is not None and password is not None:
                    c = pymongo.MongoClient(
                        host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                        connect=False)  # , username=username, password=password)
                else:
                    c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

                store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS)

                # Access the library
                try:
                    library = store[fname_single]

                    if start_date is None and finish_date is None:
                        item = library.read(fname_single)
                    else:
                        from arctic.date import DateRange
                        item = library.read(fname_single, date_range=DateRange(start_date, finish_date))

                    c.close()

                    self.logger.info('Read ' + fname_single)

                    data_frame = item.data

                except Exception as e:
                    self.logger.warning('Library does not exist: ' + fname_single + ' & message is ' + str(e))
                    data_frame = None

            elif os.path.isfile(self.get_h5_filename(fname_single)):
                store = pandas.HDFStore(self.get_h5_filename(fname_single))
                data_frame = store.select("data")

                if ('intraday' in fname_single):
                    data_frame = data_frame.astype('float32')

                store.close()

            elif os.path.isfile(fname_single):
                data_frame = pandas.read_parquet(fname_single)

            data_frame_list.append(data_frame)

        if len(data_frame_list) == 1:
            return data_frame_list[0]

        return data_frame_list

    ### functions for CSV reading and writing
    def write_time_series_to_csv(self, csv_path, data_frame):
        data_frame.to_csv(csv_path)

    def read_csv_data_frame(self, f_name, freq, cutoff=None, dateparse=None,
                            postfix='.close', intraday_tz='UTC', excel_sheet=None):
        """Reads CSV/Excel from disk into DataFrame

        Parameters
        ----------
        f_name : str
            CSV/Excel file path to read
        freq : str
            Frequency of data to read (intraday/daily etc)
        cutoff : DateTime (optional)
            end date to read up to
        dateparse : str (optional)
            date parser to use
        postfix : str (optional)
            postfix to add to each columns
        intraday_tz : str (optional)
            timezone of file if uses intraday data
        excel_sheet : str (optional)
            Excel sheet to be read

        Returns
        -------
        DataFrame
        """

        if (freq == 'intraday'):

            if dateparse is None:
                dateparse = lambda x: datetime.datetime(*map(int, [x[6:10], x[3:5], x[0:2],
                                                                   x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'dukascopy':
                dateparse = lambda x: datetime.datetime(*map(int, [x[0:4], x[5:7], x[8:10],
                                                                   x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'c':
                # use C library for parsing dates, several hundred times quicker
                # requires compilation of library to install
                import ciso8601
                dateparse = lambda x: ciso8601.parse_datetime(x)

            if excel_sheet is None:
                data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=True, date_parser=dateparse)
            else:
                data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA'])

            data_frame = data_frame.astype('float32')
            data_frame.index.names = ['Date']

            old_cols = data_frame.columns
            new_cols = []

            # add '.close' to each column name
            for col in old_cols:
                new_cols.append(col + postfix)

            data_frame.columns = new_cols
        else:
            # daily data
            if 'events' in f_name:

                data_frame = pandas.read_csv(f_name)

                # very slow conversion
                data_frame = data_frame.convert_objects(convert_dates='coerce')

            else:
                if excel_sheet is None:
                    try:
                        data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["DATE"], date_parser=dateparse)
                    except:
                        data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["Date"], date_parser=dateparse)
                else:
                    data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA'])

        # convert Date to Python datetime
        # datetime data_frame['Date1'] = data_frame.index

        # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S')
        # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]),
        #                                        int(x[12:13]), int(x[15:16]), int(x[18:19])))

        # data_frame.index = data_frame['Date1']
        # data_frame.drop('Date1')

        # slower method: data_frame.index = pandas.to_datetime(data_frame.index)

        if (freq == 'intraday'):
            # assume time series are already in UTC and assign this (can specify other time zones)
            data_frame = data_frame.tz_localize(intraday_tz)

        # end cutoff date
        if cutoff is not None:
            if (isinstance(cutoff, str)):
                cutoff = parse(cutoff)

            data_frame = data_frame.loc[data_frame.index < cutoff]

        return data_frame

    def find_replace_chars(self, array, to_find, replace_with):

        for i in range(0, len(to_find)):
            array = [x.replace(to_find[i], replace_with[i]) for x in array]

        return array

    def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None):
        """Converts CSV file to HDF5 file

        Parameters
        ----------
        f_name : str
            File name to be read
        category : str
            data category of file (used in HDF5 filename)
        freq : str
            intraday/daily frequency (used in HDF5 filename)
        cutoff : DateTime (optional)
            filter dates up to here
        dateparse : str
            date parser to use
        """

        self.logger.info("About to read... " + f_name)

        data_frame = self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse)

        category_f_name = self.create_cache_file_name(category)

        self.write_time_series_cache_to_disk(category_f_name, data_frame)

    def clean_csv_file(self, f_name):
        """Cleans up CSV file (removing empty characters) before writing back to disk

        Parameters
        ----------
        f_name : str
            CSV file to be cleaned
        """

        with codecs.open(f_name, 'rb', 'utf-8') as myfile:
            data = myfile.read()

            # clean file first if dirty
            if data.count('\x00'):
                self.logger.info('Cleaning CSV...')

                with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of:
                    of.write(data.replace('\x00', ''))

                shutil.move(f_name + '.tmp', f_name)

    def create_cache_file_name(self, filename):
        return DataConstants().folder_time_series_data + "/" + filename

    # TODO refactor IOEngine so that each database is implemented in a subclass of DBEngine

    def get_engine(self, engine='hdf5_fixed'):
        pass
コード例 #33
0
class DataVendorBBG(DataVendor):

    def __init__(self):
        super(DataVendorBBG, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, market_data_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """
        market_data_request_vendor = self.construct_vendor_market_data_request(market_data_request)

        data_frame = None
        self.logger.info("Request Bloomberg data")

        # do we need daily or intraday data?
        if (market_data_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']):

            # for events times/dates separately needs ReferenceDataRequest (when specified)
            if 'release-date-time-full' in market_data_request.fields:

                # experimental!!
                # careful: make sure you copy the market data request object (when threading, altering that can
                # cause concurrency issues!)
                datetime_data_frame = self.get_reference_data(market_data_request_vendor, market_data_request)

                old_fields = copy.deepcopy(market_data_request.fields)
                old_vendor_fields = copy.deepcopy(market_data_request_vendor.fields)

                # remove fields 'release-date-time-full' from our request (and the associated field in the vendor)
                # if they are there
                try:
                    index = market_data_request.fields.index('release-date-time-full')

                    market_data_request.fields.pop(index)
                    market_data_request_vendor.fields.pop(index)
                except:
                    pass

                # download all the other event fields (uses HistoricalDataRequest to Bloomberg)
                # concatenate with date time fields
                if len(market_data_request_vendor.fields) > 0:
                    events_data_frame = self.get_daily_data(market_data_request, market_data_request_vendor)

                    col = events_data_frame.index.name
                    events_data_frame = events_data_frame.reset_index(drop = False)

                    data_frame = pandas.concat([events_data_frame, datetime_data_frame], axis = 1)
                    temp = data_frame[col]
                    del data_frame[col]
                    data_frame.index = temp
                else:
                    data_frame = datetime_data_frame

                market_data_request.fields = old_fields
                market_data_request_vendor.fields = old_vendor_fields

            # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg
            else:
                data_frame = self.get_daily_data(market_data_request, market_data_request_vendor)

        # assume one ticker only
        # for intraday data we use IntradayDataRequest to Bloomberg
        if (market_data_request.freq in ['tick', 'intraday', 'second', 'minute', 'hourly']):
            market_data_request_vendor.tickers = market_data_request_vendor.tickers[0]

            if market_data_request.freq in ['tick', 'second']:
                data_frame = self.download_tick(market_data_request_vendor)
            else:
                data_frame = self.download_intraday(market_data_request_vendor)

            if data_frame is not None:
                if data_frame.empty:
                    try:
                        self.logger.info("No tickers returned for: " + market_data_request_vendor.tickers)
                    except:
                        pass

                    return None

                cols = data_frame.columns.values

                import pytz

                try:
                    data_frame = data_frame.tz_localize(pytz.utc)
                except:
                    data_frame = data_frame.tz_convert(pytz.utc)

                cols = market_data_request.tickers[0] + "." + cols
                data_frame.columns = cols

        self.logger.info("Completed request from Bloomberg.")

        return data_frame

    def get_daily_data(self, market_data_request, market_data_request_vendor):
        data_frame = self.download_daily(market_data_request_vendor)

        # convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            if data_frame.empty:
                self.logger.info("No tickers returned for...")

                try:
                    self.logger.info(str(market_data_request_vendor.tickers))
                except: pass

                return None

            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

            # TODO if empty try downloading again a year later
            try:
                fields = self.translate_from_vendor_field(returned_fields, market_data_request)
            except:
                print('t')

            tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def get_reference_data(self, market_data_request_vendor, market_data_request):
        end = datetime.utcnow()

        from datetime import timedelta
        end = end + timedelta(days=365)# end.replace(year = end.year + 1)

        market_data_request_vendor.finish_date = end

        self.logger.debug("Requesting ref for " + market_data_request_vendor.tickers[0] + " etc.")

        data_frame = self.download_ref(market_data_request_vendor)

        self.logger.debug("Waiting for ref...")

        # convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields, market_data_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined

            # TODO coerce will be deprecated from pandas
            data_frame = data_frame.convert_objects(convert_dates = 'coerce', convert_numeric= 'coerce')

        return data_frame

    # implement method in abstract superclass
    @abc.abstractmethod
    def kill_session(self):
        return

    @abc.abstractmethod
    def download_tick(self, market_data_request):
        return

    @abc.abstractmethod
    def download_intraday(self, market_data_request):
        return

    @abc.abstractmethod
    def download_daily(self, market_data_request):
        return

    @abc.abstractmethod
    def download_ref(self, market_data_request):
        return