def fetch_file(self, time, symbol):
        if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time))

        tick_path = self.tick_name.format(
                symbol = symbol,
                year = str(time.year).rjust(4, '0'),
                month = str(time.month).rjust(2, '0'),
                day = str(time.day).rjust(2, '0'),
                hour = str(time.hour).rjust(2, '0')
            )

        tick = self.fetch_tick(DataConstants().dukascopy_base_url + tick_path)

        if DataConstants().dukascopy_write_temp_tick_disk:
            out_path = DataConstants().temp_folder + "/dkticks/" + tick_path

            if not os.path.exists(out_path):
                if not os.path.exists(os.path.dirname(out_path)):
                    os.makedirs(os.path.dirname(out_path))

            self.write_tick(tick, out_path)

        try:
            return self.retrieve_df(lzma.decompress(tick), symbol, time)
        except:
            return None
Exemple #2
0
    def __init__(self, market_data_generator = None):
        self.logger = LoggerManager().getLogger(__name__)

        self._all_econ_tickers = pandas.read_csv(DataConstants().all_econ_tickers)
        self._econ_country_codes = pandas.read_csv(DataConstants().econ_country_codes)
        self._econ_country_groups = pandas.read_csv(DataConstants().econ_country_groups)

        if market_data_generator is None:
            self.market_data_generator = MarketDataGenerator()
        else:
            self.market_data_generator = market_data_generator
Exemple #3
0
    def __init__(self, market_data_generator = None, md_request = None):
        if market_data_generator is None:
            if DataConstants().default_market_data_generator == "marketdatagenerator":
                from findatapy.market import MarketDataGenerator
                market_data_generator = MarketDataGenerator()
            elif DataConstants().default_market_data_generator == 'cachedmarketdatagenerator':
                # NOT CURRENTLY IMPLEMENTED FOR FUTURE USE
                from finaddpy.market import CachedMarketDataGenerator
                market_data_generator = CachedMarketDataGenerator()

        self.market_data_generator = market_data_generator
        self.md_request = md_request
    def download_daily(self, market_data_request, data_vendor):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file

        if DataConstants().market_thread_no['other'] == 1:
            data_frame_agg = data_vendor.load_ticker(market_data_request)
        else:
            market_data_request_list = []

            # when trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution
            group_size = max(
                int(
                    len(market_data_request.tickers) /
                    DataConstants().market_thread_no['other'] - 1), 0)

            if group_size == 0: group_size = 1

            # split up tickers into groups related to number of threads to call
            for i in range(0, len(market_data_request.tickers), group_size):
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = market_data_request.tickers[
                    i:i + group_size]

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = \
                        market_data_request.vendor_tickers[i:i + group_size]

                market_data_request_list.append(market_data_request_single)

            data_frame_agg = self.fetch_group_time_series(
                market_data_request_list)

        key = self.create_category_key(market_data_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[
            fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg
Exemple #5
0
    def fetch_group_time_series(self, market_data_request_list):

        data_frame_agg = None

        thread_no = DataConstants().market_thread_no['other']

        if market_data_request_list[0].data_source in DataConstants(
        ).market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request_list[0].data_source]

        if thread_no > 0:
            pool = SwimPool().create_pool(
                thread_technique=DataConstants().market_thread_technique,
                thread_no=thread_no)

            # open the market data downloads in their own threads and return the results
            result = pool.map_async(self.fetch_single_time_series,
                                    market_data_request_list)
            data_frame_group = result.get()

            pool.close()
            pool.join()
        else:
            data_frame_group = []

            for md_request in market_data_request_list:
                data_frame_group.append(
                    self.fetch_single_time_series(md_request))

        # collect together all the time series
        if data_frame_group is not None:
            data_frame_group = [i for i in data_frame_group if i is not None]

            # for debugging!
            # import pickle
            # import datetime
            # pickle.dump(data_frame_group, open(str(datetime.datetime.now()).replace(':', '-').replace(' ', '-').replace(".", "-") + ".p", "wb"))

            if data_frame_group is not None:
                try:
                    data_frame_agg = self.calculations.pandas_outer_join(
                        data_frame_group)
                except Exception as e:
                    self.logger.warning(
                        'Possible overlap of columns? Have you specifed same ticker several times: '
                        + str(e))

        return data_frame_agg
Exemple #6
0
    def __init__(self, multiprocessing_library = None):
        self._pool = None

        if multiprocessing_library is None:
            multiprocessing_library = DataConstants().multiprocessing_library

        self._multiprocessing_library = multiprocessing_library
        self._thread_technique = 'na'

        if multiprocessing_library == 'multiprocess':
            try:
                import multiprocess;
                multiprocess.freeze_support()
            except:
                pass
        elif multiprocessing_library == 'multiprocessing_on_dill':
            try:
                import multiprocessing_on_dill;
                multiprocessing_on_dill.freeze_support()
            except:
                pass
        elif multiprocessing_library == 'multiprocessing':
            try:
                import multiprocessing;
                multiprocessing.freeze_support()
            except:
                pass
    def fetch_group_time_series(self, market_data_request_list):

        data_frame_agg = None

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if DataConstants().market_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = DataConstants().market_thread_no['other']

        if market_data_request_list[0].data_source in DataConstants(
        ).market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request_list[0].data_source]

        if thread_no > 0:
            pool = Pool(thread_no)

            # open the market data downloads in their own threads and return the results
            result = pool.map_async(self.fetch_single_time_series,
                                    market_data_request_list)
            data_frame_group = result.get()

            pool.close()
            pool.join()
        else:
            data_frame_group = []

            for md_request in market_data_request_list:
                data_frame_group.append(
                    self.fetch_single_time_series(md_request))

        # collect together all the time series
        if data_frame_group is not None:
            data_frame_group = [i for i in data_frame_group if i is not None]

            if data_frame_group is not None:
                data_frame_agg = self.calculations.pandas_outer_join(
                    data_frame_group)

        return data_frame_agg
    def download_daily(self, market_data_request):
        trials = 0

        data_frame = None

        while(trials < 5):
            try:
                data_frame = Quandl.get(market_data_request.tickers, authtoken=DataConstants().quandl_api_key, trim_start=market_data_request.start_date,
                                        trim_end=market_data_request.finish_date)

                break
            except:
                trials = trials + 1
                self.logger.info("Attempting... " + str(trials) + " request to download from Quandl")

        if trials == 5:
            self.logger.error("Couldn't download from Quandl after several attempts!")

        return data_frame
Exemple #9
0
#
# Copyright 2016 Cuemacro
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
# License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
# See the License for the specific language governing permissions and limitations under the License.
#

from findatapy.util import DataConstants
from findatapy.market.ioengine import SpeedCache

constants = DataConstants()

# from deco import *


class Market(object):
    """Higher level class which fetches market data using underlying classes such as MarketDataGenerator.

    Also contains several other classes, which are for asset specific instances, for example for generating FX spot time series
    or FX volatility surfaces.
    """
    def __init__(self, market_data_generator=None, md_request=None):
        if market_data_generator is None:
            if constants.default_market_data_generator == "marketdatagenerator":
                from findatapy.market import MarketDataGenerator
                market_data_generator = MarketDataGenerator()
Exemple #10
0
    def download_daily(self, market_data_request):
        trials = 0

        data_frame_list = []
        data_frame_release = []

        # TODO refactor this code, a bit messy at the moment!
        for i in range(0, len(market_data_request.tickers)):
            while (trials < 5):
                try:
                    fred = Fred(api_key=DataConstants().fred_api_key)

                    # acceptable fields: close, actual-release, release-date-time-full
                    if 'close' in market_data_request.fields and 'release-date-time-full' in market_data_request.fields:
                        data_frame = fred.get_series_all_releases(market_data_request.tickers[i],
                                                                  observation_start=market_data_request.start_date,
                                                                  observation_end=market_data_request.finish_date)

                        data_frame.columns = ['Date', market_data_request.tickers[i] + '.release-date-time-full',
                                              market_data_request.tickers[i] + '.close']

                        data_frame = data_frame.sort_values(by=['Date', market_data_request.tickers[i] + '.release-date-time-full'])
                        data_frame = data_frame.drop_duplicates(subset=['Date'], keep='last')
                        data_frame = data_frame.set_index(['Date'])

                        filter = Filter()
                        data_frame = filter.filter_time_series_by_date(market_data_request.start_date,
                                                                       market_data_request.finish_date, data_frame)

                        data_frame_list.append(data_frame)
                    elif 'close' in market_data_request.fields:

                        data_frame = fred.get_series(series_id=market_data_request.tickers[i],
                                                     observation_start=market_data_request.start_date,
                                                     observation_end=market_data_request.finish_date)

                        data_frame = pandas.DataFrame(data_frame)
                        data_frame.columns = [market_data_request.tickers[i] + '.close']
                        data_frame_list.append(data_frame)

                    if 'first-revision' in market_data_request.fields:
                        data_frame = fred.get_series_first_revision(market_data_request.tickers[i],
                                                                    observation_start=market_data_request.start_date,
                                                                    observation_end=market_data_request.finish_date)

                        data_frame = pandas.DataFrame(data_frame)
                        data_frame.columns = [market_data_request.tickers[i] + '.first-revision']

                        filter = Filter()
                        data_frame = filter.filter_time_series_by_date(market_data_request.start_date,
                                                                       market_data_request.finish_date, data_frame)

                        data_frame_list.append(data_frame)

                    if 'actual-release' in market_data_request.fields and 'release-date-time-full' in market_data_request.fields:
                        data_frame = fred.get_series_all_releases(market_data_request.tickers[i],
                                                                  observation_start=market_data_request.start_date,
                                                                  observation_end=market_data_request.finish_date)

                        data_frame.columns = ['Date', market_data_request.tickers[i] + '.release-date-time-full',
                                              market_data_request.tickers[i] + '.actual-release']

                        data_frame = data_frame.sort_values(by=['Date', market_data_request.tickers[i] + '.release-date-time-full'])
                        data_frame = data_frame.drop_duplicates(subset=['Date'], keep='first')
                        data_frame = data_frame.set_index(['Date'])

                        filter = Filter()
                        data_frame = filter.filter_time_series_by_date(market_data_request.start_date,
                                                                       market_data_request.finish_date, data_frame)

                        data_frame_list.append(data_frame)

                    elif 'actual-release' in market_data_request.fields:
                        data_frame = fred.get_series_first_release(market_data_request.tickers[i],
                                                                   observation_start=market_data_request.start_date,
                                                                   observation_end=market_data_request.finish_date)

                        data_frame = pandas.DataFrame(data_frame)
                        data_frame.columns = [market_data_request.tickers[i] + '.actual-release']

                        filter = Filter()
                        data_frame = filter.filter_time_series_by_date(market_data_request.start_date,
                                                                       market_data_request.finish_date, data_frame)

                        data_frame_list.append(data_frame)

                    elif 'release-date-time-full' in market_data_request.fields:
                        data_frame = fred.get_series_all_releases(market_data_request.tickers[i],
                                                                  observation_start=market_data_request.start_date,
                                                                  observation_end=market_data_request.finish_date)

                        data_frame = data_frame['realtime_start']

                        data_frame = pandas.DataFrame(data_frame)
                        data_frame.columns = [market_data_request.tickers[i] + '.release-date-time-full']

                        data_frame.index = data_frame[market_data_request.tickers[i] + '.release-date-time-full']
                        data_frame = data_frame.sort()
                        data_frame = data_frame.drop_duplicates()

                        filter = Filter()
                        data_frame_release.append(filter.filter_time_series_by_date(market_data_request.start_date,
                                                                       market_data_request.finish_date, data_frame))

                    break
                except:
                    trials = trials + 1
                    self.logger.info("Attempting... " + str(trials) + " request to download from ALFRED/FRED")

            if trials == 5:
                self.logger.error("Couldn't download from ALFRED/FRED after several attempts!")

        calc = Calculations()

        data_frame1 = calc.pandas_outer_join(data_frame_list)
        data_frame2 = calc.pandas_outer_join(data_frame_release)

        data_frame = pandas.concat([data_frame1, data_frame2], axis=1)

        return data_frame
Exemple #11
0
    def get_fx_cross(self,
                     start,
                     end,
                     cross,
                     cut="NYC",
                     source="bloomberg",
                     freq="intraday",
                     cache_algo='internet_load_return',
                     type='spot',
                     environment='backtest',
                     fields=['close']):

        if source == "gain" or source == 'dukascopy' or freq == 'tick':
            return self.get_fx_cross_tick(start,
                                          end,
                                          cross,
                                          cut=cut,
                                          source=source,
                                          cache_algo=cache_algo,
                                          type='spot',
                                          fields=fields)

        if isinstance(cross, str):
            cross = [cross]

        market_data_request_list = []
        freq_list = []
        type_list = []

        for cr in cross:
            market_data_request = MarketDataRequest(freq_mult=1,
                                                    cut=cut,
                                                    fields=['close'],
                                                    freq=freq,
                                                    cache_algo=cache_algo,
                                                    start_date=start,
                                                    finish_date=end,
                                                    data_source=source,
                                                    environment=environment)

            market_data_request.type = type
            market_data_request.cross = cr

            if freq == 'intraday':
                market_data_request.gran_freq = "minute"  # intraday

            elif freq == 'daily':
                market_data_request.gran_freq = "daily"  # daily

            market_data_request_list.append(market_data_request)

        data_frame_agg = []

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if DataConstants().market_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = DataConstants().market_thread_no['other']

        if market_data_request_list[0].data_source in DataConstants(
        ).market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request_list[0].data_source]

        # fudge, issue with multithreading and accessing HDF5 files
        # if self.market_data_generator.__class__.__name__ == 'CachedMarketDataGenerator':
        #    thread_no = 0

        if (thread_no > 0):
            pool = Pool(thread_no)

            # open the market data downloads in their own threads and return the results
            result = pool.map_async(self._get_individual_fx_cross,
                                    market_data_request_list)
            data_frame_agg = self.calculations.iterative_outer_join(
                result.get())

            # data_frame_agg = self.calculations.pandas_outer_join(result.get())

            # pool would have already been closed earlier
            # try:
            #    pool.close()
            #    pool.join()
            # except: pass
        else:
            for md_request in market_data_request_list:
                data_frame_agg.append(
                    self._get_individual_fx_cross(md_request))

            data_frame_agg = self.calculations.pandas_outer_join(
                data_frame_agg)

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()

        return data_frame_agg
Exemple #12
0
 def create_cache_file_name(self, filename):
     return DataConstants().folder_time_series_data + "/" + filename
Exemple #13
0
    def download_daily(self, market_data_request):
        """Loads daily time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        key = MarketDataRequest().create_category_key(market_data_request)

        is_key_overriden = False

        for k in DataConstants().override_multi_threading_for_categories:
            if k in key:
                is_key_overriden = True
                break

        # by default use other
        thread_no = DataConstants().market_thread_no['other']

        if market_data_request.data_source in DataConstants().market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request.data_source]

        # daily data does not include ticker in the key, as multiple tickers in the same file
        if thread_no == 1:
            # data_frame_agg = data_vendor.load_ticker(market_data_request)
            data_frame_agg = self.fetch_single_time_series(market_data_request)
        else:
            market_data_request_list = []

            # when trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution
            group_size = max(
                int(len(market_data_request.tickers) / thread_no - 1), 0)

            if group_size == 0: group_size = 1

            # split up tickers into groups related to number of threads to call
            for i in range(0, len(market_data_request.tickers), group_size):
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = market_data_request.tickers[
                    i:i + group_size]

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = \
                        market_data_request.vendor_tickers[i:i + group_size]

                market_data_request_list.append(market_data_request_single)

            # special case where we make smaller calls one after the other
            if is_key_overriden:

                data_frame_list = []

                for md in market_data_request_list:
                    data_frame_list.append(self.fetch_single_time_series(md))

                data_frame_agg = self.calculations.pandas_outer_join(
                    data_frame_list)
            else:
                data_frame_agg = self.fetch_group_time_series(
                    market_data_request_list)

        # fname = self.create_cache_file_name(key)
        # self._time_series_cache[fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg
Exemple #14
0
    def download_intraday_tick(self, market_data_request):
        """Loads intraday time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None
        calcuations = Calculations()

        ticker_cycle = 0

        data_frame_group = []

        # single threaded version
        # handle intraday ticker calls separately one by one
        if len(market_data_request.tickers) == 1 or DataConstants(
        ).market_thread_no['other'] == 1:
            for ticker in market_data_request.tickers:
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = ticker

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = [
                        market_data_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                # we downscale into float32, to avoid memory problems in Python (32 bit)
                # data is stored on disk as float32 anyway
                # old_finish_date = market_data_request_single.finish_date
                #
                # market_data_request_single.finish_date = self.refine_expiry_date(market_data_request)
                #
                # if market_data_request_single.finish_date >= market_data_request_single.start_date:
                #     data_frame_single = data_vendor.load_ticker(market_data_request_single)
                # else:
                #     data_frame_single = None
                #
                # market_data_request_single.finish_date = old_finish_date
                #
                # data_frame_single = data_vendor.load_ticker(market_data_request_single)

                data_frame_single = self.fetch_single_time_series(
                    market_data_request)

                # if the vendor doesn't provide any data, don't attempt to append
                if data_frame_single is not None:
                    if data_frame_single.empty == False:
                        data_frame_single.index.name = 'Date'
                        data_frame_single = data_frame_single.astype('float32')

                        data_frame_group.append(data_frame_single)

                        # # if you call for returning multiple tickers, be careful with memory considerations!
                        # if data_frame_agg is not None:
                        #     data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                        # else:
                        #     data_frame_agg = data_frame_single

                # key = self.create_category_key(market_data_request, ticker)
                # fname = self.create_cache_file_name(key)
                # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

            # if you call for returning multiple tickers, be careful with memory considerations!
            if data_frame_group is not None:
                data_frame_agg = calcuations.pandas_outer_join(
                    data_frame_group)

            return data_frame_agg

        else:
            market_data_request_list = []

            # create a list of MarketDataRequests
            for ticker in market_data_request.tickers:
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = ticker

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = [
                        market_data_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                market_data_request_list.append(market_data_request_single)

            return self.fetch_group_time_series(market_data_request_list)
Exemple #15
0
 def auto_set_key(self):
     self.twitter = Twython(DataConstants().APP_KEY,
                            DataConstants().APP_SECRET,
                            DataConstants().OAUTH_TOKEN,
                            DataConstants().OAUTH_TOKEN_SECRET)