Example #1
0
    def construct_vendor_md_request(self, md_request):
        """Creates a MarketDataRequest with the vendor tickers

        Parameters
        ----------
        md_request : MarketDataRequest
            contains all the various parameters detailing time series start and
            finish, tickers etc

        Returns
        -------
        MarketDataRequest
        """

        symbols_vendor = self.translate_to_vendor_ticker(md_request)
        fields_vendor = self.translate_to_vendor_field(md_request)

        md_request_vendor = MarketDataRequest(
            md_request=md_request)

        md_request_vendor.tickers = symbols_vendor
        md_request_vendor.fields = fields_vendor

        md_request_vendor.old_tickers = md_request.tickers

        return md_request_vendor
Example #2
0
    def get_fx_cross_tick(self,
                          start,
                          end,
                          cross,
                          cut="NYC",
                          data_source="dukascopy",
                          cache_algo='internet_load_return',
                          type='spot',
                          environment='backtest',
                          fields=['bid', 'ask']):

        if isinstance(cross, str):
            cross = [cross]

        market_data_request = MarketDataRequest(
            gran_freq="tick",
            freq_mult=1,
            freq='tick',
            cut=cut,
            fields=['bid', 'ask', 'bidv', 'askv'],
            cache_algo=cache_algo,
            environment=environment,
            start_date=start,
            finish_date=end,
            data_source=data_source,
            category='fx')

        market_data_generator = self.market_data_generator
        data_frame_agg = None

        for cr in cross:

            if (type == 'spot'):
                market_data_request.tickers = cr

                cross_vals = market_data_generator.fetch_market_data(
                    market_data_request)

                # if user only wants 'close' calculate that from the bid/ask fields
                if fields == ['close']:
                    cross_vals = cross_vals[[cr + '.bid',
                                             cr + '.ask']].mean(axis=1)
                    cross_vals.columns = [cr + '.close']
                else:
                    filter = Filter()

                    filter_columns = [cr + '.' + f for f in fields]
                    cross_vals = filter.filter_time_series_by_columns(
                        filter_columns, cross_vals)

            if data_frame_agg is None:
                data_frame_agg = cross_vals
            else:
                data_frame_agg = data_frame_agg.join(cross_vals, how='outer')

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()
        return data_frame_agg
Example #3
0
    def get_base_depos(self,
                       start,
                       end,
                       currencies,
                       tenor,
                       cut="NYC",
                       data_source="bloomberg",
                       cache_algo="internet_load_return"):
        """Gets the deposit rates for a particular tenor and part of surface

        Parameter
        ---------
        start : Datetime
            Start date
        end : Datetime
            End data
        currencies : str
            Currencies for which we want to download deposit rates
        tenor : str
            Tenor of deposit rate
        cut : str
            Closing time of the market data
        data_source : str
            data_source of the market data eg. bloomberg
        cache_algo : str
            Caching scheme for the data

        Returns
        -------
        pandas.DataFrame
            Contains deposit rates
        """

        market_data_generator = self.market_data_generator

        if isinstance(currencies, str): currencies = [currencies]
        if isinstance(tenor, str): tenor = [tenor]

        tickers = []

        for cr in currencies:
            for tn in tenor:
                tickers.append(cr + tn)

        market_data_request = MarketDataRequest(start_date=start,
                                                finish_date=end,
                                                data_source=data_source,
                                                category='base-depos',
                                                freq='daily',
                                                cut=cut,
                                                tickers=tickers,
                                                fields=['close'],
                                                cache_algo=cache_algo,
                                                environment='backtest')

        data_frame = market_data_generator.fetch_market_data(
            market_data_request)
        data_frame.index.name = 'Date'

        return data_frame
Example #4
0
    def get_fx_forward_points(self,
                              start,
                              end,
                              cross,
                              tenor,
                              cut="BGN",
                              source="bloomberg",
                              cache_algo="internet_load_return"):
        """ get_forward_points = get forward points for specified cross, tenor and part of surface

        :param start: start date
        :param end: end date
        :param cross: asset to be calculated
        :param tenor: tenor to calculate
        :param cut: closing time of data
        :param source: source of data eg. bloomberg

        :return: forward points
        """

        market_data_request = MarketDataRequest()
        market_data_generator = self.market_data_generator

        market_data_request.data_source = source  # use bbg as a source
        market_data_request.start_date = start  # start_date
        market_data_request.finish_date = end  # finish_date

        if isinstance(cross, str): cross = [cross]
        if isinstance(tenor, str): tenor = [tenor]

        tenor = [x.replace('1Y', '12M') for x in tenor]

        tickers = []

        for cr in cross:
            for tn in tenor:
                tickers.append(cr + tn)

        market_data_request = MarketDataRequest(start_date=start,
                                                finish_date=end,
                                                data_source=source,
                                                category='fx-forwards',
                                                freq='daily',
                                                cut=cut,
                                                tickers=tickers,
                                                fields=['close'],
                                                cache_algo=cache_algo,
                                                environment='backtest')

        data_frame = market_data_generator.fetch_market_data(
            market_data_request)
        data_frame.columns = [
            x.replace('12M', '1Y') for x in data_frame.columns
        ]
        data_frame.index.name = 'Date'

        return data_frame
Example #5
0
    def get_fx_implied_vol(self,
                           start,
                           end,
                           cross,
                           tenor,
                           cut="BGN",
                           data_source="bloomberg",
                           part="V",
                           cache_algo="internet_load_return"):
        """Get implied vol for specified cross, tenor and part of surface

        Parameters
        ----------
        start : Datetime
            start date of request
        end : Datetime
            end date of request
        cross : str
            FX cross
        tenor : str
            tenor of implied vol
        cut : str
            closing time of data
        data_source : str
            data_source of market data eg. bloomberg
        part : str
            part of vol surface eg. V for ATM implied vol, 25R 25 delta risk reversal

        Return
        ------
        pandas.DataFrame

        """

        market_data_generator = self.market_data_generator

        tickers = self.get_labels(cross, part, tenor)

        market_data_request = MarketDataRequest(start_date=start,
                                                finish_date=end,
                                                data_source=data_source,
                                                category='fx-implied-vol',
                                                freq='daily',
                                                cut=cut,
                                                tickers=tickers,
                                                fields=['close'],
                                                cache_algo=cache_algo,
                                                environment='backtest')
        data_frame = market_data_generator.fetch_market_data(
            market_data_request)
        data_frame.index.name = 'Date'

        return data_frame
Example #6
0
    def get_fx_implied_vol(self,
                           start,
                           end,
                           cross,
                           tenor,
                           cut="BGN",
                           source="bloomberg",
                           part="V",
                           cache_algo="internet_load_return"):
        """ get_implied_vol = get implied vol for specified cross, tenor and part of surface

        :param start: start date
        :param end: end date
        :param cross: asset to be calculated
        :param tenor: tenor to calculate
        :param cut: closing time of data
        :param source: source of data eg. bloomberg
        :param part: part of vol surface eg. V for ATM implied vol, 25R 25 delta risk reversal

        :return: realised volatility
        """

        market_data_generator = self.market_data_generator

        if isinstance(cross, str): cross = [cross]
        if isinstance(tenor, str): tenor = [tenor]
        if isinstance(part, str): part = [part]

        tickers = []

        for cr in cross:
            for tn in tenor:
                for pt in part:
                    tickers.append(cr + pt + tn)

        market_data_request = MarketDataRequest(start_date=start,
                                                finish_date=end,
                                                data_source=source,
                                                category='fx-implied-vol',
                                                freq='daily',
                                                cut=cut,
                                                tickers=tickers,
                                                fields=['close'],
                                                cache_algo=cache_algo,
                                                environment='backtest')
        data_frame = market_data_generator.fetch_market_data(
            market_data_request)
        data_frame.index.name = 'Date'

        return data_frame
Example #7
0
    def get_base_depos(self,
                       start,
                       end,
                       currencies,
                       tenor,
                       cut="NYC",
                       source="bloomberg",
                       cache_algo="internet_load_return"):
        """ get_forward_points = get forward points for specified cross, tenor and part of surface

        :param start: start date
        :param end: end date
        :param cross: asset to be calculated
        :param tenor: tenor to calculate
        :param cut: closing time of data
        :param source: source of data eg. bloomberg

        :return: forward points
        """

        market_data_generator = self.market_data_generator

        if isinstance(currencies, str): currencies = [currencies]
        if isinstance(tenor, str): tenor = [tenor]

        tickers = []

        for cr in currencies:
            for tn in tenor:
                tickers.append(cr + tn)

        market_data_request = MarketDataRequest(start_date=start,
                                                finish_date=end,
                                                data_source=source,
                                                category='base-depos',
                                                freq='daily',
                                                cut=cut,
                                                tickers=tickers,
                                                fields=['close'],
                                                cache_algo=cache_algo,
                                                environment='backtest')

        data_frame = market_data_generator.fetch_market_data(
            market_data_request)
        data_frame.index.name = 'Date'

        return data_frame
Example #8
0
    def create_time_series_hash_key(self, market_data_request, ticker = None):
        """Creates a hash key for retrieving the time series

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if(isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(MarketDataRequest().create_category_key(market_data_request, ticker))
Example #9
0
    def get_base_depos(self,
                       start,
                       end,
                       currencies,
                       tenor,
                       cut="NYC",
                       data_source="bloomberg",
                       cache_algo="internet_load_return"):
        """Gets the deposit rates for a particular tenor and part of surface

        Parameter
        ---------
        start : DateTime
            Start date
        end : DateTime
            End data
        currencies : str
            Currencies for which we want to download deposit rates
        tenor : str
            Tenor of deposit rate
        cut : str
            Closing time of the market data
        data_source : str
            data_source of the market data eg. bloomberg
        cache_algo : str
            Caching scheme for the data

        Returns
        -------
        pd.DataFrame
            Contains deposit rates
        """

        market_data_generator = self._market_data_generator

        if tenor is None:
            tenor = constants.base_depos_tenor

        if isinstance(currencies, str): currencies = [currencies]
        if isinstance(tenor, str): tenor = [tenor]

        tickers = []

        for cr in currencies:

            for tn in tenor:
                tickers.append(cr + tn)

        # Special case for Fed Funds Effective Rate
        if 'USDFedEffectiveRate' not in tickers:
            tickers.append("USDFedEffectiveRate")

        # For depos there usually isn't a 10AM NYC cut available, so just use TOK data
        if cut == '10AM':
            cut = 'TOK'

        market_data_request = MarketDataRequest(start_date=start,
                                                finish_date=end,
                                                data_source=data_source,
                                                category='base-depos',
                                                freq='daily',
                                                cut=cut,
                                                tickers=tickers,
                                                fields=['close'],
                                                cache_algo=cache_algo,
                                                environment='backtest')

        data_frame = market_data_generator.fetch_market_data(
            market_data_request)
        data_frame.index.name = 'Date'

        return data_frame
Example #10
0
    def get_fx_forward_points(self,
                              start,
                              end,
                              cross,
                              tenor,
                              cut="BGN",
                              data_source="bloomberg",
                              cache_algo="internet_load_return"):
        """Gets the forward points for a particular tenor and currency

        Parameter
        ---------
        start : Datetime
            Start date
        end : Datetime
            End data
        cross : str
            FX crosses for which we want to download forward points
        tenor : str
            Tenor of deposit rate
        cut : str
            Closing time of the market data
        data_source : str
            data_source of the market data eg. bloomberg
        cache_algo : str
            Caching scheme for the data

        Returns
        -------
        pd.DataFrame
        Contains deposit rates
        """

        # market_data_request = MarketDataRequest()
        market_data_generator = self._market_data_generator

        # market_data_request.data_source = data_source  # use bbg as a data_source
        # market_data_request.start_date = start  # start_date
        # market_data_request.finish_date = end  # finish_date

        if tenor is None:
            tenor = constants.fx_forwards_tenor

        if isinstance(cross, str): cross = [cross]
        if isinstance(tenor, str): tenor = [tenor]

        # Tickers are often different on Bloomberg for forwards/depos vs vol, so want consistency so 12M is always 1Y
        tenor = [x.replace('1Y', '12M') for x in tenor]

        tickers = []

        for cr in cross:
            for tn in tenor:
                tickers.append(cr + tn)

        market_data_request = MarketDataRequest(start_date=start,
                                                finish_date=end,
                                                data_source=data_source,
                                                category='fx-forwards',
                                                freq='daily',
                                                cut=cut,
                                                tickers=tickers,
                                                fields=['close'],
                                                cache_algo=cache_algo,
                                                environment='backtest')

        data_frame = market_data_generator.fetch_market_data(
            market_data_request)
        data_frame.columns = [
            x.replace('12M', '1Y') for x in data_frame.columns
        ]
        data_frame.index.name = 'Date'

        return data_frame
Example #11
0
    def get_fx_cross(self,
                     start,
                     end,
                     cross,
                     cut="NYC",
                     data_source="bloomberg",
                     freq="intraday",
                     cache_algo='internet_load_return',
                     type='spot',
                     environment='backtest',
                     fields=['close']):

        if data_source == "gain" or data_source == 'dukascopy' or freq == 'tick':
            return self.get_fx_cross_tick(start,
                                          end,
                                          cross,
                                          cut=cut,
                                          data_source=data_source,
                                          cache_algo=cache_algo,
                                          type='spot',
                                          fields=fields)

        if isinstance(cross, str):
            cross = [cross]

        market_data_request_list = []
        freq_list = []
        type_list = []

        for cr in cross:
            market_data_request = MarketDataRequest(freq_mult=1,
                                                    cut=cut,
                                                    fields=['close'],
                                                    freq=freq,
                                                    cache_algo=cache_algo,
                                                    start_date=start,
                                                    finish_date=end,
                                                    data_source=data_source,
                                                    environment=environment)

            market_data_request.type = type
            market_data_request.cross = cr

            if freq == 'intraday':
                market_data_request.gran_freq = "minute"  # intraday

            elif freq == 'daily':
                market_data_request.gran_freq = "daily"  # daily

            market_data_request_list.append(market_data_request)

        data_frame_agg = []

        # Depends on the nature of operation as to whether we should use threading or multiprocessing library
        if constants.market_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # Most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocess library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocess import Pool

        thread_no = constants.market_thread_no['other']

        if market_data_request_list[
                0].data_source in constants.market_thread_no:
            thread_no = constants.market_thread_no[
                market_data_request_list[0].data_source]

        # Fudge, issue with multithreading and accessing HDF5 files
        # if self._market_data_generator.__class__.__name__ == 'CachedMarketDataGenerator':
        #    thread_no = 0
        thread_no = 0

        if (thread_no > 0):
            pool = Pool(thread_no)

            # Open the market data downloads in their own threads and return the results
            df_list = pool.map_async(self._get_individual_fx_cross,
                                     market_data_request_list).get()

            data_frame_agg = self._calculations.iterative_outer_join(df_list)

            # data_frame_agg = self._calculations.pandas_outer_join(result.get())

            try:
                pool.close()
                pool.join()
            except:
                pass
        else:
            for md_request in market_data_request_list:
                data_frame_agg.append(
                    self._get_individual_fx_cross(md_request))

            data_frame_agg = self._calculations.pandas_outer_join(
                data_frame_agg)

        # Strip the nan elements
        data_frame_agg = data_frame_agg.dropna(how='all')

        # self.speed_cache.put_dataframe(key, data_frame_agg)

        return data_frame_agg
Example #12
0
    def get_fx_implied_vol(self,
                           start,
                           end,
                           cross,
                           tenor,
                           cut="BGN",
                           data_source="bloomberg",
                           part="V",
                           cache_algo="internet_load_return",
                           environment='backtest'):
        """Get implied vol for specified cross, tenor and part of surface. By default we use Bloomberg, but we could
        use any data provider for which we have vol tickers.

        Note, that for Bloomberg not every point will be quoted for each dataset (typically, BGN will have more points
        than for example LDN)

        Parameters
        ----------
        start : datetime
            start date of request
        end : datetime
            end date of request
        cross : str
            FX cross
        tenor : str
            tenor of implied vol
        cut : str
            closing time of data
        data_source : str
            data_source of market data eg. bloomberg
        part : str
            part of vol surface eg. V for ATM implied vol, 25R 25 delta risk reversal

        Return
        ------
        pd.DataFrame
        """

        market_data_generator = self._market_data_generator

        if tenor is None:
            tenor = constants.fx_vol_tenor

        if part is None:
            part = constants.fx_vol_part

        tickers = self.get_labels(cross, part, tenor)

        market_data_request = MarketDataRequest(start_date=start,
                                                finish_date=end,
                                                data_source=data_source,
                                                category='fx-implied-vol',
                                                freq='daily',
                                                cut=cut,
                                                tickers=tickers,
                                                fields=['close'],
                                                cache_algo=cache_algo,
                                                environment=environment)

        data_frame = market_data_generator.fetch_market_data(
            market_data_request)
        # data_frame.index.name = 'Date'

        # Special case for 10AM NYC cut
        # - get some historical 10AM NYC data (only available on BBG for a few years, before 2007)
        # - fill the rest with a weighted average of TOK/LDN closes
        if cut == "10AM":
            # Where we have actual 10am NY data use that & overwrite earlier estimated data (next)
            vol_data_10am = data_frame

            # As for most dates we probably won't have 10am data, so drop rows where there's no data at all
            # Can have the situation where some data won't be there (eg. longer dated illiquid tenors)
            if vol_data_10am is not None:
                vol_data_10am = vol_data_10am.dropna(
                    how='all')  # Only have limited ON 10am cut data

            # Now get LDN and TOK vol data to fill any gaps
            vol_data_LDN = self.get_fx_implied_vol(start=start,
                                                   end=end,
                                                   cross=cross,
                                                   tenor=tenor,
                                                   data_source=data_source,
                                                   cut='LDN',
                                                   part=part,
                                                   cache_algo=cache_algo)

            vol_data_TOK = self.get_fx_implied_vol(start=start,
                                                   end=end,
                                                   cross=cross,
                                                   tenor=tenor,
                                                   data_source=data_source,
                                                   cut='TOK',
                                                   part=part,
                                                   cache_algo=cache_algo)

            # vol_data_LDN.index = pandas.DatetimeIndex(vol_data_LDN.index)
            # vol_data_TOK.index = pandas.DatetimeIndex(vol_data_TOK.index)

            old_cols = vol_data_LDN.columns

            vol_data_LDN.columns = vol_data_LDN.columns.values + "LDN"
            vol_data_TOK.columns = vol_data_TOK.columns.values + "TOK"

            data_frame = vol_data_LDN.join(vol_data_TOK, how='outer')

            # Create very naive average of LDN and TOK to estimate 10am NY value because we often don't have this data
            # Note, this isn't perfect, particularly on days where you have payrolls data, and we're looking at ON data
            for col in old_cols:
                data_frame[col] = (1 * data_frame[col + "LDN"] +
                                   3 * data_frame[col + "TOK"]) / 4

                data_frame.pop(col + "LDN")
                data_frame.pop(col + "TOK")

            # Get TOK/LDN vol data before 10am and after 10am (10am data is only available for a few years)
            # If we have no original 10am data don't bother
            if vol_data_10am is not None:
                if not (vol_data_10am.empty):
                    pre_vol_data = data_frame[
                        data_frame.index < vol_data_10am.index[0]]
                    post_vol_data = data_frame[
                        data_frame.index > vol_data_10am.index[-1]]

                    data_frame = (pre_vol_data.append(vol_data_10am)
                                  ).append(post_vol_data)

            # data_frame.index = pandas.to_datetime(data_frame.index)

        return data_frame
    def fetch_single_time_series(self, md_request):
        
        md_request = MarketDataRequest(md_request=md_request)

        # Only includes those tickers have not expired yet!
        start_date = pd.Timestamp(md_request.start_date).date()

        current_date = pd.Timestamp(datetime.datetime.utcnow().date())

        tickers = md_request.tickers
        vendor_tickers = md_request.vendor_tickers

        expiry_date = pd.Timestamp(md_request.expiry_date)

        config = ConfigManager().get_instance()

        # In many cases no expiry is defined so skip them
        for i in range(0, len(tickers)):
            try:
                expiry_date = config.get_expiry_for_ticker(
                    md_request.data_source, tickers[i])
            except:
                pass

            if expiry_date is not None:
                expiry_date = pd.Timestamp(expiry_date)

                if not (pd.isna(expiry_date)):
                    # Use pandas Timestamp, a bit more robust with weird dates 
                    # (can fail if comparing date vs datetime)
                    # if the expiry is before the start date of our download 
                    # don"t bother downloading this ticker
                    if expiry_date < start_date:
                        tickers[i] = None

                    # Special case for futures-contracts which are intraday
                    # avoid downloading if the expiry date is very far in the 
                    # past
                    # (we need this before there might be odd situations where 
                    # we run on an expiry date, but still want to get
                    # data right till expiry time)
                    if md_request.category == "futures-contracts" \
                            and md_request.freq == "intraday" \
                            and self._days_expired_intraday_contract_download \
                                > 0:

                        if expiry_date + pd.Timedelta(
                                days=
                                self._days_expired_intraday_contract_download) \
                                < current_date:
                            tickers[i] = None

                    if vendor_tickers is not None and tickers[i] is None:
                        vendor_tickers[i] = None

        md_request.tickers = [e for e in tickers if e != None]

        if vendor_tickers is not None:
            md_request.vendor_tickers = [e for e in vendor_tickers if
                                                  e != None]

        df_single = None

        if len(md_request.tickers) > 0:
            df_single = self.get_data_vendor(
                md_request).load_ticker(md_request)

        if df_single is not None:
            if df_single.empty == False:
                df_single.index.name = "Date"

                # Will fail for DataFrames which includes dates/strings 
                # eg. futures contract names
                df_single = Calculations().convert_to_numeric_dataframe(
                    df_single)

                if md_request.freq == "second":
                    df_single = df_single.resample("1s")

        return df_single
    def download_daily(self, md_request):
        """Loads daily time series from specified data provider

        Parameters
        ----------
        md_request : MarketDataRequest
            contains various properties describing time series to fetched, 
            including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        key = MarketDataRequest().create_category_key(
            md_request=md_request)

        is_key_overriden = False

        for k in constants.override_multi_threading_for_categories:
            if k in key:
                is_key_overriden = True
                break

        # By default use other
        thread_no = constants.market_thread_no["other"]

        if str(md_request.data_source) in constants.market_thread_no:
            thread_no = constants.market_thread_no[
                md_request.data_source]

        # Daily data does not include ticker in the key, as multiple tickers 
        # in the same file
        if thread_no == 1 or ".csv" in str(md_request.data_source) or \
                ".h5" in str(
            md_request.data_source) or ".parquet" in str(
            md_request.data_source) \
                or ".zip" in str(
            md_request.data_source) or md_request.data_engine is not None:
            # df_agg = data_vendor.load_ticker(md_request)
            df_agg = self.fetch_single_time_series(md_request)
        else:
            md_request_list = []

            # When trying your example "equitiesdata_example" I had a -1 result 
            # so it went out of the comming loop and I had errors in execution
            group_size = max(
                int(len(md_request.tickers) / thread_no - 1), 0)

            if group_size == 0: group_size = 1

            # Split up tickers into groups related to number of threads to call
            for i in range(0, len(md_request.tickers), group_size):
                md_request_single = copy.copy(md_request)
                md_request_single.tickers = \
                    md_request.tickers[i:i + group_size]

                if md_request.vendor_tickers is not None:
                    md_request_single.vendor_tickers = \
                        md_request.vendor_tickers[i:i + group_size]

                md_request_list.append(md_request_single)

            # Special case where we make smaller calls one after the other
            if is_key_overriden:

                df_list = []

                for md in md_request_list:
                    df_list.append(self.fetch_single_time_series(md))

                df_agg = self._calculations.join(df_list,
                                                         how="outer")
            else:
                df_agg = self.fetch_group_time_series(
                    md_request_list)

        return df_agg
Example #15
0
    def load_ticker(self, md_request):
        """Retrieves market data from external data source (in this case 
        Bloomberg)

        Parameters
        ----------
        md_request : MarketDataRequest
            contains all the various parameters detailing time series start 
            and finish, tickers etc

        Returns
        -------
        DataFrame
        """
        constants = DataConstants()

        md_request = MarketDataRequest(md_request=md_request)
        md_request_vendor = self.construct_vendor_md_request(md_request)

        data_frame = None

        logger = LoggerManager().getLogger(__name__)
        logger.info("Request Bloomberg data")

        # Do we need daily or intraday data?
        if (md_request.freq
                in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']):

            # Work out the fields which need to be downloaded via Bloomberg ref request (BDP) and
            # those that can be downloaded via Historical request (BDH)
            ref_fields = []
            ref_vendor_fields = []

            # Get user defined list of BBG fields/vendor fields which need to
            # be downloaded by BDP
            bbg_ref_fields = list(constants.bbg_ref_fields.keys())
            bbg_ref_vendor_fields = list(constants.bbg_ref_fields.values())

            for i in range(0, len(md_request.fields)):
                if md_request.fields[i] in bbg_ref_fields \
                        or md_request_vendor.fields[
                    i] in bbg_ref_vendor_fields:
                    ref_fields.append(md_request.fields[i])
                    ref_vendor_fields.append(md_request_vendor.fields[i])

            non_ref_fields = []
            non_ref_vendor_fields = []

            for i in range(0, len(md_request.fields)):
                if md_request.fields[i] not in bbg_ref_fields \
                        and md_request_vendor.fields[
                    i] not in bbg_ref_vendor_fields:
                    non_ref_fields.append(md_request.fields[i])
                    non_ref_vendor_fields.append(md_request_vendor.fields[i])

            # For certain cases, need to use ReferenceDataRequest
            # eg. for events times/dates, last tradeable date fields (when specified)
            if len(ref_fields) > 0:

                # Careful: make sure you copy the market data request object
                # (when threading, altering that can
                # cause concurrency issues!)
                old_fields = copy.deepcopy(md_request.fields)
                old_vendor_fields = copy.deepcopy(md_request_vendor.fields)

                # md_request = MarketDataRequest(md_request=md_request_copy)

                md_request.fields = ref_fields
                md_request.vendor_fields = ref_vendor_fields
                md_request_vendor = self.construct_vendor_md_request(
                    md_request)

                # Just select those reference fields to download via reference
                datetime_data_frame = self.get_reference_data(
                    md_request_vendor, md_request)

                # Download all the other event or non-ref fields
                # (uses HistoricalDataRequest to Bloomberg)
                # concatenate with date time fields
                if len(non_ref_fields) > 0:

                    md_request.fields = non_ref_fields
                    md_request.vendor_fields = non_ref_vendor_fields
                    md_request_vendor = self.construct_vendor_md_request(
                        md_request)

                    events_data_frame = self.get_daily_data(
                        md_request, md_request_vendor)

                    col = events_data_frame.index.name
                    events_data_frame = events_data_frame.reset_index(
                        drop=False)

                    data_frame = pd.concat(
                        [events_data_frame, datetime_data_frame], axis=1)
                    temp = data_frame[col]
                    del data_frame[col]
                    data_frame.index = temp
                else:
                    data_frame = datetime_data_frame

                md_request.fields = copy.deepcopy(old_fields)
                md_request_vendor.fields = copy.deepcopy(old_vendor_fields)

            # For all other daily/monthly/quarter data, we can use
            # HistoricalDataRequest to Bloomberg
            else:
                data_frame = self.get_daily_data(md_request, md_request_vendor)

                # if data_frame is not None:
                #     # Convert fields with release-dt to dates (special case!) and assume everything else numerical
                #     for c in data_frame.columns:
                #         try:
                #             if 'release-dt' in c:
                #                 data_frame[c] = (data_frame[c]).astype('int').astype(str).apply(
                #                         lambda x: pd.to_datetime(x, format='%Y%m%d'))
                #             else:
                #                 data_frame[c] = pd.to_numeric(data_frame[c])
                #         except:
                #             pass

        # Assume one ticker only for intraday data and use IntradayDataRequest
        # to Bloomberg
        if (md_request.freq
                in ['tick', 'intraday', 'second', 'minute', 'hourly']):
            md_request_vendor.tickers = \
            md_request_vendor.tickers[0]

            if md_request.freq in ['tick', 'second']:
                data_frame = self.download_tick(md_request_vendor)
            else:
                data_frame = self.download_intraday(md_request_vendor)

            if data_frame is not None:
                if data_frame.empty:
                    try:
                        logger.info("No tickers returned for: " +
                                    md_request_vendor.tickers)
                    except:
                        pass

                    return None

                cols = data_frame.columns.values

                import pytz

                try:
                    data_frame = data_frame.tz_localize(pytz.utc)
                except:
                    data_frame = data_frame.tz_convert(pytz.utc)

                cols = md_request.tickers[0] + "." + cols
                data_frame.columns = cols

        logger.info("Completed request from Bloomberg.")

        return data_frame
Example #16
0
    def fetch_single_time_series(self, market_data_request):

        market_data_request = MarketDataRequest(md_request=market_data_request)

        # only includes those tickers have not expired yet!
        start_date = pandas.Timestamp(market_data_request.start_date).date()

        import datetime

        current_date = datetime.datetime.utcnow().date()

        from datetime import timedelta

        tickers = market_data_request.tickers
        vendor_tickers = market_data_request.vendor_tickers

        expiry_date = market_data_request.expiry_date

        config = ConfigManager().get_instance()

        # in many cases no expiry is defined so skip them
        for i in range(0, len(tickers)):
            try:
                expiry_date = config.get_expiry_for_ticker(
                    market_data_request.data_source, tickers[i])
            except:
                pass

            if expiry_date is not None:
                expiry_date = pandas.Timestamp(expiry_date).date()

                # use pandas Timestamp, a bit more robust with weird dates (can fail if comparing date vs datetime)
                # if the expiry is before the start date of our download don't bother downloading this ticker
                if expiry_date < start_date:
                    tickers[i] = None

                # special case for futures-contracts which are intraday
                # avoid downloading if the expiry date is very far in the past
                # (we need this before there might be odd situations where we run on an expiry date, but still want to get
                # data right till expiry time)
                if market_data_request.category == 'futures-contracts' and market_data_request.freq == 'intraday' \
                        and self.days_expired_intraday_contract_download > 0:

                    if expiry_date + timedelta(
                            days=self.days_expired_intraday_contract_download
                    ) < current_date:
                        tickers[i] = None

                if vendor_tickers is not None and tickers[i] is None:
                    vendor_tickers[i] = None

        market_data_request.tickers = [e for e in tickers if e != None]

        if vendor_tickers is not None:
            market_data_request.vendor_tickers = [
                e for e in vendor_tickers if e != None
            ]

        data_frame_single = None

        if len(market_data_request.tickers) > 0:
            data_frame_single = self.get_data_vendor(
                market_data_request.data_source).load_ticker(
                    market_data_request)
            #print(data_frame_single.head(n=10))

        if data_frame_single is not None:
            if data_frame_single.empty == False:
                data_frame_single.index.name = 'Date'

                # will fail for dataframes which includes dates/strings (eg. futures contract names)
                try:
                    data_frame_single = data_frame_single.astype('float32')
                except:
                    self.logger.warning('Could not convert to float')

                if market_data_request.freq == "second":
                    data_frame_single = data_frame_single.resample("1s")

        return data_frame_single
Example #17
0
    def download_daily(self, market_data_request):
        """Loads daily time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        key = MarketDataRequest().create_category_key(market_data_request)

        is_key_overriden = False

        for k in DataConstants().override_multi_threading_for_categories:
            if k in key:
                is_key_overriden = True
                break

        # by default use other
        thread_no = DataConstants().market_thread_no['other']

        if market_data_request.data_source in DataConstants().market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request.data_source]

        # daily data does not include ticker in the key, as multiple tickers in the same file
        if thread_no == 1:
            # data_frame_agg = data_vendor.load_ticker(market_data_request)
            data_frame_agg = self.fetch_single_time_series(market_data_request)
        else:
            market_data_request_list = []

            # when trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution
            group_size = max(
                int(len(market_data_request.tickers) / thread_no - 1), 0)

            if group_size == 0: group_size = 1

            # split up tickers into groups related to number of threads to call
            for i in range(0, len(market_data_request.tickers), group_size):
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = market_data_request.tickers[
                    i:i + group_size]

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = \
                        market_data_request.vendor_tickers[i:i + group_size]

                market_data_request_list.append(market_data_request_single)

            # special case where we make smaller calls one after the other
            if is_key_overriden:

                data_frame_list = []

                for md in market_data_request_list:
                    data_frame_list.append(self.fetch_single_time_series(md))

                data_frame_agg = self.calculations.pandas_outer_join(
                    data_frame_list)
            else:
                data_frame_agg = self.fetch_group_time_series(
                    market_data_request_list)

        # fname = self.create_cache_file_name(key)
        # self._time_series_cache[fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg