Example #1
0
class FXCrossFactory(object):
    """Generates FX spot time series and FX total return time series (assuming we already have
    total return indices available from xxxUSD form) from underlying series. Can also produce cross rates from the USD
    crosses.

    """
    def __init__(self, market_data_generator=None):
        self.logger = LoggerManager().getLogger(__name__)
        self.fxconv = FXConv()

        self.cache = {}

        self.calculations = Calculations()
        self.market_data_generator = market_data_generator

        return

    def get_fx_cross_tick(self,
                          start,
                          end,
                          cross,
                          cut="NYC",
                          data_source="dukascopy",
                          cache_algo='internet_load_return',
                          type='spot',
                          environment='backtest',
                          fields=['bid', 'ask']):

        if isinstance(cross, str):
            cross = [cross]

        market_data_request = MarketDataRequest(
            gran_freq="tick",
            freq_mult=1,
            freq='tick',
            cut=cut,
            fields=['bid', 'ask', 'bidv', 'askv'],
            cache_algo=cache_algo,
            environment=environment,
            start_date=start,
            finish_date=end,
            data_source=data_source,
            category='fx')

        market_data_generator = self.market_data_generator
        data_frame_agg = None

        for cr in cross:

            if (type == 'spot'):
                market_data_request.tickers = cr

                cross_vals = market_data_generator.fetch_market_data(
                    market_data_request)

                # if user only wants 'close' calculate that from the bid/ask fields
                if fields == ['close']:
                    cross_vals = cross_vals[[cr + '.bid',
                                             cr + '.ask']].mean(axis=1)
                    cross_vals.columns = [cr + '.close']
                else:
                    filter = Filter()

                    filter_columns = [cr + '.' + f for f in fields]
                    cross_vals = filter.filter_time_series_by_columns(
                        filter_columns, cross_vals)

            if data_frame_agg is None:
                data_frame_agg = cross_vals
            else:
                data_frame_agg = data_frame_agg.join(cross_vals, how='outer')

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()
        return data_frame_agg

    def get_fx_cross(self,
                     start,
                     end,
                     cross,
                     cut="NYC",
                     data_source="bloomberg",
                     freq="intraday",
                     cache_algo='internet_load_return',
                     type='spot',
                     environment='backtest',
                     fields=['close']):

        if data_source == "gain" or data_source == 'dukascopy' or freq == 'tick':
            return self.get_fx_cross_tick(start,
                                          end,
                                          cross,
                                          cut=cut,
                                          data_source=data_source,
                                          cache_algo=cache_algo,
                                          type='spot',
                                          fields=fields)

        if isinstance(cross, str):
            cross = [cross]

        market_data_request_list = []
        freq_list = []
        type_list = []

        for cr in cross:
            market_data_request = MarketDataRequest(freq_mult=1,
                                                    cut=cut,
                                                    fields=['close'],
                                                    freq=freq,
                                                    cache_algo=cache_algo,
                                                    start_date=start,
                                                    finish_date=end,
                                                    data_source=data_source,
                                                    environment=environment)

            market_data_request.type = type
            market_data_request.cross = cr

            if freq == 'intraday':
                market_data_request.gran_freq = "minute"  # intraday

            elif freq == 'daily':
                market_data_request.gran_freq = "daily"  # daily

            market_data_request_list.append(market_data_request)

        data_frame_agg = []

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if DataConstants().market_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = DataConstants().market_thread_no['other']

        if market_data_request_list[0].data_source in DataConstants(
        ).market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request_list[0].data_source]

        # fudge, issue with multithreading and accessing HDF5 files
        # if self.market_data_generator.__class__.__name__ == 'CachedMarketDataGenerator':
        #    thread_no = 0

        if (thread_no > 0):
            pool = Pool(thread_no)

            # open the market data downloads in their own threads and return the results
            data_frame_agg = self.calculations.iterative_outer_join(
                pool.map_async(self._get_individual_fx_cross,
                               market_data_request_list).get())

            # data_frame_agg = self.calculations.pandas_outer_join(result.get())

            try:
                pool.close()
                pool.join()
            except:
                pass
        else:
            for md_request in market_data_request_list:
                data_frame_agg.append(
                    self._get_individual_fx_cross(md_request))

            data_frame_agg = self.calculations.pandas_outer_join(
                data_frame_agg)

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()

        # self.speed_cache.put_dataframe(key, data_frame_agg)

        return data_frame_agg

    def _get_individual_fx_cross(self, market_data_request):
        cr = market_data_request.cross
        type = market_data_request.type
        freq = market_data_request.freq

        base = cr[0:3]
        terms = cr[3:6]

        if (type == 'spot'):
            # non-USD crosses
            if base != 'USD' and terms != 'USD':
                base_USD = self.fxconv.correct_notation('USD' + base)
                terms_USD = self.fxconv.correct_notation('USD' + terms)

                # TODO check if the cross exists in the database

                # download base USD cross
                market_data_request.tickers = base_USD
                market_data_request.category = 'fx'

                if base_USD + '.close' in self.cache:
                    base_vals = self.cache[base_USD + '.close']
                else:
                    base_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                    self.cache[base_USD + '.close'] = base_vals

                # download terms USD cross
                market_data_request.tickers = terms_USD
                market_data_request.category = 'fx'

                if terms_USD + '.close' in self.cache:
                    terms_vals = self.cache[terms_USD + '.close']
                else:
                    terms_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                    self.cache[terms_USD + '.close'] = terms_vals

                # if quoted USD/base flip to get USD terms
                if (base_USD[0:3] == 'USD'):
                    if 'USD' + base in '.close' in self.cache:
                        base_vals = self.cache['USD' + base + '.close']
                    else:
                        base_vals = 1 / base_vals
                        self.cache['USD' + base + '.close'] = base_vals

                # if quoted USD/terms flip to get USD terms
                if (terms_USD[0:3] == 'USD'):
                    if 'USD' + terms in '.close' in self.cache:
                        terms_vals = self.cache['USD' + terms + '.close']
                    else:
                        terms_vals = 1 / terms_vals
                        self.cache['USD' + terms + '.close'] = base_vals

                base_vals.columns = ['temp']
                terms_vals.columns = ['temp']

                cross_vals = base_vals.div(terms_vals, axis='index')
                cross_vals.columns = [cr + '.close']

                base_vals.columns = [base_USD + '.close']
                terms_vals.columns = [terms_USD + '.close']
            else:
                # if base == 'USD': non_USD = terms
                # if terms == 'USD': non_USD = base

                correct_cr = self.fxconv.correct_notation(cr)

                market_data_request.tickers = correct_cr
                market_data_request.category = 'fx'

                if correct_cr + '.close' in self.cache:
                    cross_vals = self.cache[correct_cr + '.close']
                else:
                    cross_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)

                    # flip if not convention
                    if (correct_cr != cr):
                        if cr + '.close' in self.cache:
                            cross_vals = self.cache[cr + '.close']
                        else:
                            cross_vals = 1 / cross_vals
                            self.cache[cr + '.close'] = cross_vals

                    self.cache[correct_cr + '.close'] = cross_vals

                # cross_vals = self.market_data_generator.harvest_time_series(market_data_request)
                cross_vals.columns.names = [cr + '.close']

        elif type[0:3] == "tot":
            if freq == 'daily':
                # download base USD cross
                market_data_request.tickers = base + 'USD'
                market_data_request.category = 'fx-tot'

                if type == "tot":
                    base_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                else:
                    x = 0

                # download terms USD cross
                market_data_request.tickers = terms + 'USD'
                market_data_request.category = 'fx-tot'

                if type == "tot":
                    terms_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                else:
                    pass

                base_rets = self.calculations.calculate_returns(base_vals)
                terms_rets = self.calculations.calculate_returns(terms_vals)

                cross_rets = base_rets.sub(terms_rets.iloc[:, 0], axis=0)

                # first returns of a time series will by NaN, given we don't know previous point
                cross_rets.iloc[0] = 0

                cross_vals = self.calculations.create_mult_index(cross_rets)
                cross_vals.columns = [cr + '-tot.close']

            elif freq == 'intraday':
                self.logger.info(
                    'Total calculated returns for intraday not implemented yet'
                )
                return None

        return cross_vals
Example #2
0
    # in the config file, we can use keywords 'open', 'high', 'low', 'close' and 'volume' for alphavantage data

    # download equities data from alphavantage
    md_request = MarketDataRequest(
        start_date="01 Jan 2002",  # start date
        finish_date="05 Feb 2017",  # finish date
        data_source='alphavantage',  # use alphavantage as data source
        tickers=[
            'Apple', 'Citigroup', 'Microsoft', 'Oracle', 'IBM', 'Walmart',
            'Amazon', 'UPS', 'Exxon'
        ],  # ticker (findatapy)
        fields=['close'],  # which fields to download
        vendor_tickers=[
            'aapl', 'c', 'msft', 'orcl', 'ibm', 'wmt', 'amzn', 'ups', 'xom'
        ],  # ticker (alphavantage)
        vendor_fields=['Close'],  # which alphavantage fields to download
        cache_algo='internet_load_return')

    logger.info("Load data from alphavantage directly")
    df = market.fetch_market(md_request)

    logger.info(
        "Loaded data from alphavantage directly, now try reading from Redis in-memory cache"
    )
    md_request.cache_algo = 'cache_algo_return'  # change flag to cache algo so won't attempt to download via web

    df = market.fetch_market(md_request)

    logger.info("Read from Redis cache.. that was a lot quicker!")
Example #3
0
class DataVendorDukasCopy(DataVendor):
    tick_name  = "{symbol}/{year}/{month}/{day}/{hour}h_ticks.bi5"

    def __init__(self):
        super(DataVendor, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        import logging
        logging.getLogger("requests").setLevel(logging.WARNING)
        self.config = ConfigManager()

    # implement method in abstract superclass
    def load_ticker(self, market_data_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        market_data_request : TimeSeriesRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        market_data_request_vendor = self.construct_vendor_market_data_request(market_data_request)

        data_frame = None
        self.logger.info("Request Dukascopy data")

        # doesn't support non-tick data
        if (market_data_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly', 'intraday', 'minute', 'hourly']):
            self.logger.warning("Dukascopy loader is for tick data only")

            return None

        # assume one ticker only (MarketDataGenerator only calls one ticker at a time)
        if (market_data_request.freq in ['tick']):
            # market_data_request_vendor.tickers = market_data_request_vendor.tickers[0]

            data_frame = self.get_tick(market_data_request, market_data_request_vendor)

            if data_frame is not None: data_frame.tz_localize('UTC')

        self.logger.info("Completed request from Dukascopy")

        return data_frame

    def kill_session(self):
        return

    def get_tick(self, market_data_request, market_data_request_vendor):

        data_frame = self.download_tick(market_data_request_vendor)

        # convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns
            returned_tickers = [market_data_request_vendor.tickers[0]] * (len(returned_fields))

        if data_frame is not None:
            fields = self.translate_from_vendor_field(returned_fields, market_data_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def download_tick(self, market_data_request):

        symbol = market_data_request.tickers[0]
        df_list = []

        self.logger.info("About to download from Dukascopy... for " + symbol)

        # single threaded
        df_list = [self.fetch_file(time, symbol) for time in
                  self.hour_range(market_data_request.start_date, market_data_request.finish_date)]

        # TODO parallel (has pickle issues)
        # time_list = self.hour_range(market_data_request.start_date, market_data_request.finish_date)
        # import multiprocessing_on_dill as multiprocessing
        #
        # pool = multiprocessing.Pool(processes=4)
        # results = [pool.apply_async(self.fetch_file, args=(time, symbol)) for time in time_list]
        # df_list = [p.get() for p in results]

        try:
            return pandas.concat(df_list)
        except:
            return None

    def fetch_file(self, time, symbol):
        if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time))

        tick_path = self.tick_name.format(
                symbol = symbol,
                year = str(time.year).rjust(4, '0'),
                month = str(time.month).rjust(2, '0'),
                day = str(time.day).rjust(2, '0'),
                hour = str(time.hour).rjust(2, '0')
            )

        tick = self.fetch_tick(DataConstants().dukascopy_base_url + tick_path)

        if DataConstants().dukascopy_write_temp_tick_disk:
            out_path = DataConstants().temp_folder + "/dkticks/" + tick_path

            if not os.path.exists(out_path):
                if not os.path.exists(os.path.dirname(out_path)):
                    os.makedirs(os.path.dirname(out_path))

            self.write_tick(tick, out_path)

        try:
            return self.retrieve_df(lzma.decompress(tick), symbol, time)
        except:
            return None

    def fetch_tick(self, tick_url):
        i = 0
        tick_request = None

        # try up to 5 times to download
        while i < 5:
            try:
                tick_request = requests.get(tick_url)
                i = 5
            except:
                i = i + 1

        if (tick_request is None):
            self.logger("Failed to download from " + tick_url)
            return None

        return tick_request.content

    def write_tick(self, content, out_path):
        data_file = open(out_path, "wb+")
        data_file.write(content)
        data_file.close()

    def chunks(self, list, n):
        if n < 1:
            n = 1
        return [list[i:i + n] for i in range(0, len(list), n)]

    def retrieve_df(self, data, symbol, epoch):
        date, tuple = self.parse_tick_data(data, epoch)

        df = pandas.DataFrame(data = tuple, columns=['temp', 'ask', 'bid', 'askv', 'bidv'], index = date)
        df.drop('temp', axis = 1)
        df.index.name = 'Date'

        divisor = 100000

        # where JPY is the terms currency we have different divisor
        if symbol[3:6] == 'JPY':
            divisor = 1000

        # prices are returned without decimal point
        df['bid'] =  df['bid'] /  divisor
        df['ask'] =  df['ask'] / divisor

        return df

    def hour_range(self, start_date, end_date):
          delta_t = end_date - start_date

          delta_hours = (delta_t.days *  24.0) + (delta_t.seconds / 3600.0)

          for n in range(int (delta_hours)):
              yield start_date + timedelta(0, 0, 0, 0, 0, n) # Hours

    def parse_tick_data(self, data, epoch):
        import struct

        # tick = namedtuple('Tick', 'Date ask bid askv bidv')

        chunks_list = self.chunks(data, 20)
        parsed_list = []
        date = []

        # note: Numba can speed up for loops
        for row in chunks_list:
            d = struct.unpack(">LLLff", row)
            date.append((epoch + timedelta(0,0,0, d[0])))

            # SLOW: no point using named tuples!
            # row_data = tick._asdict(tick._make(d))
            # row_data['Date'] = (epoch + timedelta(0,0,0,row_data['Date']))

            parsed_list.append(d)

        return date, parsed_list

    def chunks(self, list, n):
        if n < 1: n = 1

        return [list[i:i + n] for i in range(0, len(list), n)]

    def get_daily_data(self):
        pass
Example #4
0
class DataVendorQuandl(DataVendor):

    def __init__(self):
        super(DataVendorQuandl, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, market_data_request):
        market_data_request_vendor = self.construct_vendor_market_data_request(market_data_request)

        self.logger.info("Request Quandl data")

        data_frame = self.download_daily(market_data_request_vendor)

        if data_frame is None or data_frame.index is []: return None

        # convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            returned_tickers = data_frame.columns

        if data_frame is not None:
            # tidy up tickers into a format that is more easily translatable
            # we can often get multiple fields returned (even if we don't ask for them!)
            # convert to lower case
            returned_fields = [(x.split(' - ')[1]).lower().replace(' ', '-').replace('.', '-').replace('--', '-') for x in returned_tickers]
            returned_fields = [x.replace('value', 'close') for x in returned_fields]    # special case for close

            returned_tickers = [x.replace('.', '/') for x in returned_tickers]
            returned_tickers = [x.split(' - ')[0] for x in returned_tickers]

            fields = self.translate_from_vendor_field(returned_fields, market_data_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        self.logger.info("Completed request from Quandl.")

        return data_frame

    def download_daily(self, market_data_request):
        trials = 0

        data_frame = None

        while(trials < 5):
            try:
                data_frame = Quandl.get(market_data_request.tickers, authtoken=DataConstants().quandl_api_key, trim_start=market_data_request.start_date,
                                        trim_end=market_data_request.finish_date)

                break
            except:
                trials = trials + 1
                self.logger.info("Attempting... " + str(trials) + " request to download from Quandl")

        if trials == 5:
            self.logger.error("Couldn't download from Quandl after several attempts!")

        return data_frame
Example #5
0
    # and "volume" for alphavantage data

    # Download equities data from yahoo
    md_request = MarketDataRequest(
        start_date="01 Jan 2002",  # start date
        finish_date="05 Feb 2017",  # finish date
        data_source="yahoo",  # use alphavantage as data source
        tickers=["Apple", "Citigroup", "Microsoft", "Oracle", "IBM", "Walmart",
                 "Amazon", "UPS", "Exxon"],  # ticker (findatapy)
        fields=["close"],  # which fields to download
        vendor_tickers=["aapl", "c", "msft", "orcl", "ibm", "wmt", "amzn",
                        "ups", "xom"],  # ticker (yahoo)
        vendor_fields=["Close"],  # which yahoo fields to download
        cache_algo="internet_load_return")

    logger.info("Load data from yahoo directly")
    df = market.fetch_market(md_request)

    print(df)

    logger.info(
        "Loaded data from yahoo directly, now try reading from Redis "
        "in-memory cache")
    md_request.cache_algo = "cache_algo_return"  # change flag to cache algo
    # so won"t attempt to download via web

    df = market.fetch_market(md_request)
    print(df)

    logger.info("Read from Redis cache.. that was a lot quicker!")