class LoaderQuandl(LoaderTemplate):

    def __init__(self):
        super(LoaderQuandl, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request)

        self.logger.info("Request Quandl data")

        data_frame = self.download_daily(time_series_request_vendor)

        if data_frame is None or data_frame.index is []: return None

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_tickers = data_frame.columns

        if data_frame is not None:
            # tidy up tickers into a format that is more easily translatable
            returned_tickers = [x.replace(' - Value', '') for x in returned_tickers]
            returned_tickers = [x.replace(' - VALUE', '') for x in returned_tickers]
            returned_tickers = [x.replace('.', '/') for x in returned_tickers]

            fields = self.translate_from_vendor_field(['close' for x in returned_tickers], time_series_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        self.logger.info("Completed request from Quandl.")

        return data_frame

    def download_daily(self, time_series_request):
        trials = 0

        data_frame = None

        while(trials < 5):
            try:
                data_frame = Quandl.get(time_series_request.tickers, authtoken=Constants().quandl_api_key, trim_start=time_series_request.start_date,
                            trim_end=time_series_request.finish_date)

                break
            except:
                trials = trials + 1
                self.logger.info("Attempting... " + str(trials) + " request to download from Quandl")

        if trials == 5:
            self.logger.error("Couldn't download from Quandl after several attempts!")

        return data_frame
Beispiel #2
0
class BBGLowLevelRef(BBGLowLevelTemplate):

    def __init__(self):
        super(BBGLowLevelRef, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
        self._options = []

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        self._options.fields = time_series_request.fields

        return self._options

    def process_message(self, msg):
        data = collections.defaultdict(dict)

        # process received events
        securityDataArray = msg.getElement('securityData')

        index = 0

        for securityData in list(securityDataArray.values()):
            ticker = securityData.getElementAsString("security")
            fieldData = securityData.getElement("fieldData")

            for field in fieldData.elements():
                if not field.isValid():
                    field_name = "%s" % field.name()

                    self.logger.error(field_name + " is NULL")
                elif field.isArray():
                    # iterate over complex data returns.
                    field_name = "%s" % field.name()

                    for i, row in enumerate(field.values()):
                        data[(field_name, ticker)][index] = re.findall(r'"(.*?)"', "%s" % row)[0]

                        index = index + 1
                # else:
                    # vals.append(re.findall(r'"(.*?)"', "%s" % row)[0])
                    # print("%s = %s" % (field.name(), field.getValueAsString()))

            fieldExceptionArray = securityData.getElement("fieldExceptions")

            for fieldException in list(fieldExceptionArray.values()):
                errorInfo = fieldException.getElement("errorInfo")
                print(errorInfo.getElementAsString("category"), ":", \
                    fieldException.getElementAsString("fieldId"))

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not(data_frame.empty)):
            data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            self.logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) + ' - ' + str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    def combine_slices(self, data_frame, data_frame_slice):
        if (data_frame_slice.columns.get_level_values(1).values[0]
            not in data_frame.columns.get_level_values(1).values):

            return data_frame.join(data_frame_slice, how="outer")

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest('ReferenceDataRequest')

        self.add_override(request, 'TIME_ZONE_OVERRIDE', 23)    # force GMT time
        self.add_override(request, 'START_DT', self._options.startDateTime.strftime('%Y%m%d'))
        self.add_override(request, 'END_DT', self._options.endDateTime.strftime('%Y%m%d'))

        # only one security/eventType per request
        for field in self._options.fields:
            request.getElement("fields").appendValue(field)

        for security in self._options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Ref Request:" + str(request))
        session.sendRequest(request)
class LoaderQuandl(LoaderTemplate):
    def __init__(self):
        super(LoaderQuandl, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        time_series_request_vendor = self.construct_vendor_time_series_request(
            time_series_request)

        self.logger.info("Request Quandl data")

        data_frame = self.download_daily(time_series_request_vendor)

        if data_frame is None or data_frame.index is []: return None

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_tickers = data_frame.columns

        if data_frame is not None:
            # tidy up tickers into a format that is more easily translatable
            # we can often get multiple fields returned (even if we don't ask for them!)
            # convert to lower case
            returned_fields = [(x.split(' - ')[1]).lower().replace(' ', '-')
                               for x in returned_tickers]
            returned_fields = [
                x.replace('value', 'close') for x in returned_fields
            ]  # special case for close

            returned_tickers = [x.replace('.', '/') for x in returned_tickers]
            returned_tickers = [x.split(' - ')[0] for x in returned_tickers]

            fields = self.translate_from_vendor_field(returned_fields,
                                                      time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        self.logger.info("Completed request from Quandl.")

        return data_frame

    def download_daily(self, time_series_request):
        trials = 0

        data_frame = None

        while (trials < 5):
            try:
                data_frame = Quandl.get(
                    time_series_request.tickers,
                    authtoken=Constants().quandl_api_key,
                    trim_start=time_series_request.start_date,
                    trim_end=time_series_request.finish_date)

                break
            except:
                trials = trials + 1
                self.logger.info("Attempting... " + str(trials) +
                                 " request to download from Quandl")

        if trials == 5:
            self.logger.error(
                "Couldn't download from Quandl after several attempts!")

        return data_frame
Beispiel #4
0
class BBGLowLevelRef(BBGLowLevelTemplate):
    def __init__(self):
        super(BBGLowLevelRef, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
        self._options = []

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, time_series_request):
        self._options = OptionsBBG()

        self._options.security = time_series_request.tickers
        self._options.startDateTime = time_series_request.start_date
        self._options.endDateTime = time_series_request.finish_date
        self._options.fields = time_series_request.fields

        return self._options

    def process_message(self, msg):
        data = collections.defaultdict(dict)

        # process received events
        securityDataArray = msg.getElement('securityData')

        index = 0

        for securityData in list(securityDataArray.values()):
            ticker = securityData.getElementAsString("security")
            fieldData = securityData.getElement("fieldData")

            for field in fieldData.elements():
                if not field.isValid():
                    field_name = "%s" % field.name()

                    self.logger.error(field_name + " is NULL")
                elif field.isArray():
                    # iterate over complex data returns.
                    field_name = "%s" % field.name()

                    for i, row in enumerate(field.values()):
                        data[(field_name, ticker)][index] = re.findall(
                            r'"(.*?)"', "%s" % row)[0]

                        index = index + 1
                # else:
                # vals.append(re.findall(r'"(.*?)"', "%s" % row)[0])
                # print("%s = %s" % (field.name(), field.getValueAsString()))

            fieldExceptionArray = securityData.getElement("fieldExceptions")

            for fieldException in list(fieldExceptionArray.values()):
                errorInfo = fieldException.getElement("errorInfo")
                print(errorInfo.getElementAsString("category"), ":", \
                    fieldException.getElementAsString("fieldId"))

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not (data_frame.empty)):
            data_frame.columns = pandas.MultiIndex.from_tuples(
                data, names=['field', 'ticker'])
            self.logger.info("Reading: " + ticker + ' ' +
                             str(data_frame.index[0]) + ' - ' +
                             str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    def combine_slices(self, data_frame, data_frame_slice):
        if (data_frame_slice.columns.get_level_values(1).values[0]
                not in data_frame.columns.get_level_values(1).values):

            return data_frame.join(data_frame_slice, how="outer")

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest('ReferenceDataRequest')

        self.add_override(request, 'TIME_ZONE_OVERRIDE', 23)  # force GMT time
        self.add_override(request, 'START_DT',
                          self._options.startDateTime.strftime('%Y%m%d'))
        self.add_override(request, 'END_DT',
                          self._options.endDateTime.strftime('%Y%m%d'))

        # only one security/eventType per request
        for field in self._options.fields:
            request.getElement("fields").appendValue(field)

        for security in self._options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Ref Request:" + str(request))
        session.sendRequest(request)
class LightTimeSeriesFactory:
    _time_series_cache = {} # shared across all instances of object!

    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()
        self._bbg_default_api = Constants().bbg_default_api
        self._intraday_code = -1

        return

    def set_bloomberg_com_api(self):
        """
        set_bloomberg_com_api - Sets Bloomberg API to COM library
        """

        self._bbg_default_api = 'com-api'

    def set_bloomberg_open_api(self):
        """
        set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended)
        """

        self._bbg_default_api = 'open-api'

    def flush_cache(self):
        """
        flush_cache - Flushs internal cache of time series
        """

        self._time_series_cache = {}

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_loader(self, source):
        """
        get_loader - Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.

        Returns
        -------
        LoaderTemplate
        """

        loader = None

        if source == 'bloomberg':

            ### allow use of COM API (older) and Open APIs (newer) for Bloomberg
            if self._bbg_default_api == 'com-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM
                loader = LoaderBBGCOM()
            elif self._bbg_default_api == 'open-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen
                loader = LoaderBBGOpen()

        elif source == 'quandl':
            from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl
            loader = LoaderQuandl()

        elif source == 'dukascopy':
            from pythalesians.market.loaders.lowlevel.brokers.loaderdukascopy import LoaderDukasCopy
            loader = LoaderDukasCopy()

        elif source in ['yahoo', 'google', 'fred']:
            from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb
            loader = LoaderPandasWeb()

        # TODO add support for other data sources (like Reuters)

        return loader

    def harvest_time_series(self, time_series_request, kill_session = True):
        """
        havest_time_series - Loads time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        tickers = time_series_request.tickers
        loader = self.get_loader(time_series_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None : create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            time_series_request.tickers = self.config.get_tickers_list_for_category(
            time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut)

        # intraday or tick: only one ticker per cache file
        if (time_series_request.freq in ['intraday', 'tick']):
            data_frame_agg = self.download_intraday_tick(time_series_request, loader)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else: data_frame_agg = self.download_daily(time_series_request, loader)

        if('internet_load' in time_series_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to loader template to exit session
            if loader is not None and kill_session == True: loader.kill_session()

        if(time_series_request.cache_algo == 'cache_algo'):
            self.logger.debug("Only caching data in memory, do not return any time series."); return

        tsf = TimeSeriesFilter()

        # only return time series if specified in the algo
        if 'return' in time_series_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if hasattr(time_series_request, 'category'):
                if 'events' in time_series_request.category:
                    return data_frame_agg

            try:
                return tsf.filter_time_series(time_series_request, data_frame_agg)
            except:
                import traceback

                self.logger.error(traceback.format_exc())

                return None

    def get_time_series_cached(self, time_series_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        if (time_series_request.freq == "intraday"):
            ticker = time_series_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(time_series_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            tsf = TimeSeriesFilter()

            return tsf.filter_time_series(time_series_request, data_frame)

        return None

    def create_time_series_hash_key(self, time_series_request, ticker = None):
        """
        create_time_series_hash_key - Creates a hash key for retrieving the time series

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if(isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(
            self.create_category_key(time_series_request, ticker))

    def download_intraday_tick(self, time_series_request, loader):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None

        ticker_cycle = 0

        # handle intraday ticker calls separately one by one
        for ticker in time_series_request.tickers:
            time_series_request_single = copy.copy(time_series_request)
            time_series_request_single.tickers = ticker

            if hasattr(time_series_request, 'vendor_tickers'):
                time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]]
                ticker_cycle = ticker_cycle + 1

            # we downscale into float32, to avoid memory problems in Python (32 bit)
            # data is stored on disk as float32 anyway
            data_frame_single = loader.load_ticker(time_series_request_single)

            # if the vendor doesn't provide any data, don't attempt to append
            if data_frame_single is not None:
                if data_frame_single.empty == False:
                    data_frame_single.index.name = 'Date'
                    data_frame_single = data_frame_single.astype('float32')

                    # if you call for returning multiple tickers, be careful with memory considerations!
                    if data_frame_agg is not None:
                        data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                    else:
                        data_frame_agg = data_frame_single

            # key = self.create_category_key(time_series_request, ticker)
            # fname = self.create_cache_file_name(key)
            # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

        return data_frame_agg

    def download_daily(self, time_series_request, loader):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file
        data_frame_agg = loader.load_ticker(time_series_request)

        key = self.create_category_key(time_series_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def create_category_key(self, time_series_request, ticker=None):
        """
        create_category_key - Returns a category key for the associated TimeSeriesRequest

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        category = 'default-cat'
        cut = 'default-cut'

        if hasattr(time_series_request, 'category'): category = time_series_request.category

        source = time_series_request.data_source
        freq = time_series_request.freq

        if hasattr(time_series_request, 'cut'): cut = time_series_request.cut

        if (ticker is not None): key = category + '.' + source + '.' + freq + '.' + cut + '.' + ticker
        else: key = category + '.' + source + '.' + freq + '.' + cut

        return key

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename
class LightTimeSeriesFactory:
    _time_series_cache = {} # shared across all instances of object!

    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()
        self._bbg_default_api = Constants().bbg_default_api
        self._intraday_code = -1

        return

    def set_bloomberg_com_api(self):
        """
        set_bloomberg_com_api - Sets Bloomberg API to COM library
        """

        self._bbg_default_api = 'com-api'

    def set_bloomberg_open_api(self):
        """
        set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended)
        """

        self._bbg_default_api = 'open-api'

    def flush_cache(self):
        """
        flush_cache - Flushs internal cache of time series
        """

        self._time_series_cache = {}

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_loader(self, source):
        """
        get_loader - Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.

        Returns
        -------
        LoaderTemplate
        """

        loader = None

        if source == 'bloomberg':

            ### allow use of COM API (older) and Open APIs (newer) for Bloomberg
            if self._bbg_default_api == 'com-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM
                loader = LoaderBBGCOM()
            elif self._bbg_default_api == 'open-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen
                loader = LoaderBBGOpen()

        elif source == 'quandl':
            from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl
            loader = LoaderQuandl()

        elif source == 'dukascopy':
            from pythalesians.market.loaders.lowlevel.brokers.loaderdukascopy import LoaderDukasCopy
            loader = LoaderDukasCopy()

        elif source in ['yahoo', 'google', 'fred']:
            from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb
            loader = LoaderPandasWeb()

        # TODO add support for other data sources (like Reuters)

        return loader

    def harvest_time_series(self, time_series_request, kill_session = True):
        """
        havest_time_series - Loads time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        tickers = time_series_request.tickers
        loader = self.get_loader(time_series_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None :
            create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            time_series_request.tickers = self.config.get_tickers_list_for_category(
            time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut)

        # intraday or tick: only one ticker per cache file
        if (time_series_request.freq in ['intraday', 'tick']):
            data_frame_agg = self.download_intraday_tick(time_series_request, loader)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else: data_frame_agg = self.download_daily(time_series_request, loader)

        if('internet_load' in time_series_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to loader template to exit session
            # if loader is not None and kill_session == True: loader.kill_session()

        if(time_series_request.cache_algo == 'cache_algo'):
            self.logger.debug("Only caching data in memory, do not return any time series."); return

        tsf = TimeSeriesFilter()

        # only return time series if specified in the algo
        if 'return' in time_series_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if hasattr(time_series_request, 'category'):
                if 'events' in time_series_request.category:
                    return data_frame_agg

            try:
                return tsf.filter_time_series(time_series_request, data_frame_agg)
            except:
                import traceback

                self.logger.error(traceback.format_exc())

                return None

    def get_time_series_cached(self, time_series_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        if (time_series_request.freq == "intraday"):
            ticker = time_series_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(time_series_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            tsf = TimeSeriesFilter()

            return tsf.filter_time_series(time_series_request, data_frame)

        return None

    def create_time_series_hash_key(self, time_series_request, ticker = None):
        """
        create_time_series_hash_key - Creates a hash key for retrieving the time series

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if(isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(self.create_category_key(time_series_request, ticker))

    def download_intraday_tick(self, time_series_request, loader):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None

        ticker_cycle = 0

        # single threaded version
        # handle intraday ticker calls separately one by one
        if len(time_series_request.tickers) == 1 or Constants().time_series_factory_thread_no['other'] == 1:
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]]
                    ticker_cycle = ticker_cycle + 1

                # we downscale into float32, to avoid memory problems in Python (32 bit)
                # data is stored on disk as float32 anyway
                data_frame_single = loader.load_ticker(time_series_request_single)

                # if the vendor doesn't provide any data, don't attempt to append
                if data_frame_single is not None:
                    if data_frame_single.empty == False:
                        data_frame_single.index.name = 'Date'
                        data_frame_single = data_frame_single.astype('float32')

                        # if you call for returning multiple tickers, be careful with memory considerations!
                        if data_frame_agg is not None:
                            data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                        else:
                            data_frame_agg = data_frame_single

                # key = self.create_category_key(time_series_request, ticker)
                # fname = self.create_cache_file_name(key)
                # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

            return data_frame_agg
        else:
            time_series_request_list = []

            # create a list of TimeSeriesRequests
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]]
                    ticker_cycle = ticker_cycle + 1

                time_series_request_list.append(time_series_request_single)

            return self.fetch_group_time_series(time_series_request_list)

    def fetch_single_time_series(self, time_series_request):
        data_frame_single = self.get_loader(time_series_request.data_source).load_ticker(time_series_request)

        if data_frame_single is not None:
            if data_frame_single.empty == False:
                data_frame_single.index.name = 'Date'
                data_frame_single = data_frame_single.astype('float32')

        return data_frame_single

    def fetch_group_time_series(self, time_series_request_list):

        data_frame_agg = None

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if Constants().time_series_factory_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = Constants().time_series_factory_thread_no['other']

        if time_series_request_list[0].data_source in Constants().time_series_factory_thread_no:
            thread_no = Constants().time_series_factory_thread_no[time_series_request_list[0].data_source]

        pool = Pool(thread_no)

        # open the market data downloads in their own threads and return the results
        result = pool.map_async(self.fetch_single_time_series, time_series_request_list)
        data_frame_group = result.get()

        pool.close()
        pool.join()

        # data_frame_group = results.get()
        # data_frame_group = results
        # data_frame_group = None

        #import multiprocessing as multiprocessing
        # close the pool and wait for the work to finish

        # processes = []

        # for x in range(0, len(time_series_request_list)):
        #    time_series_request = time_series_request_list[x]
        # processes =   [multiprocessing.Process(target = self.fetch_single_time_series,
        #                                           args = (x)) for x in time_series_request_list]

        # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq,
        #             exclude_freq_cat, force_new_download_freq_cat, include_freq_cat))

        # Run processes
        # for p in processes: p.start()

        # Exit the completed processes
        # for p in processes: p.join()

        # collect together all the time series
        if data_frame_group is not None:
            for data_frame_single in data_frame_group:
                # if you call for returning multiple tickers, be careful with memory considerations!
                if data_frame_single is not None:
                    if data_frame_agg is not None:
                        data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                    else:
                        data_frame_agg = data_frame_single

        return data_frame_agg

    def download_daily(self, time_series_request, loader):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file

        if Constants().time_series_factory_thread_no['other'] == 1:
            data_frame_agg = loader.load_ticker(time_series_request)
        else:
            time_series_request_list = []

            group_size = int(len(time_series_request.tickers) / Constants().time_series_factory_thread_no['other'] - 1)

            if group_size == 0: group_size = 1

            # split up tickers into groups related to number of threads to call
            for i in range(0, len(time_series_request.tickers), group_size):
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = time_series_request.tickers[i:i + group_size]

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = \
                        time_series_request.vendor_tickers[i:i + group_size]

                time_series_request_list.append(time_series_request_single)

            data_frame_agg = self.fetch_group_time_series(time_series_request_list)

        key = self.create_category_key(time_series_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def create_category_key(self, time_series_request, ticker=None):
        """
        create_category_key - Returns a category key for the associated TimeSeriesRequest

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        category = 'default-cat'
        cut = 'default-cut'

        if hasattr(time_series_request, 'category'): category = time_series_request.category

        environment = time_series_request.environment
        source = time_series_request.data_source
        freq = time_series_request.freq

        if hasattr(time_series_request, 'cut'): cut = time_series_request.cut

        if (ticker is not None): key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker
        else: key = environment + "." + category + '.' + source + '.' + freq + '.' + cut

        return key

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename
class HistEconDataFactory:
    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)

        self._all_econ_tickers = pandas.read_csv(Constants().all_econ_tickers)
        self._econ_country_codes = pandas.read_csv(
            Constants().econ_country_codes)
        self._econ_country_groups = pandas.read_csv(
            Constants().econ_country_groups)

        self.time_series_factory = LightTimeSeriesFactory()

        # if Constants().default_time_series_factory == 'lighttimeseriesfactory':
        #     self.time_series_factory = LightTimeSeriesFactory()
        # else:
        #     self.time_series_factory = CachedTimeSeriesFactory()
        # return

    def get_economic_data_history(self,
                                  start_date,
                                  finish_date,
                                  country_group,
                                  data_type,
                                  source='fred',
                                  cache_algo="internet_load_return"):

        #vendor_country_codes = self.fred_country_codes[country_group]
        #vendor_pretty_country = self.fred_nice_country_codes[country_group]

        if isinstance(country_group, list):
            pretty_country_names = country_group
        else:
            # get all the country names in the country_group
            pretty_country_names = list(self._econ_country_groups[
                self._econ_country_groups["Country Group"] == country_group]
                                        ['Country'])

        # construct the pretty tickers
        pretty_tickers = [x + '-' + data_type for x in pretty_country_names]

        # get vendor tickers
        vendor_tickers = []

        for pretty_ticker in pretty_tickers:
            vendor_ticker = list(
                self._all_econ_tickers[self._all_econ_tickers["Full Code"] ==
                                       pretty_ticker][source].values)

            if vendor_ticker == []:
                vendor_ticker = None
                self.logger.error('Could not find match for ' + pretty_ticker)
            else:
                vendor_ticker = vendor_ticker[0]

            vendor_tickers.append(vendor_ticker)

        vendor_fields = ['close']

        if source == 'bloomberg': vendor_fields = ['PX_LAST']

        time_series_request = TimeSeriesRequest(
            start_date=start_date,  # start date
            finish_date=finish_date,  # finish date
            category='economic',
            freq='daily',  # intraday data
            data_source=source,  # use Bloomberg as data source
            cut='LOC',
            tickers=pretty_tickers,
            fields=['close'],  # which fields to download
            vendor_tickers=vendor_tickers,
            vendor_fields=vendor_fields,  # which Bloomberg fields to download
            cache_algo=cache_algo)  # how to return data

        return self.time_series_factory.harvest_time_series(
            time_series_request)

    def grasp_coded_entry(self, df, index):
        df = df.ix[index:].stack()
        df = df.reset_index()
        df.columns = ['Date', 'Name', 'Val']

        countries = df['Name']

        countries = [x.split('-', 1)[0] for x in countries]

        df['Code'] = sum([
            list(self._econ_country_codes[self._econ_country_codes["Country"]
                                          == x]['Code']) for x in countries
        ], [])

        return df
class HistEconDataFactory:
    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)

        self._all_econ_tickers = pandas.read_csv(Constants().all_econ_tickers)
        self._econ_country_codes = pandas.read_csv(Constants().econ_country_codes)
        self._econ_country_groups = pandas.read_csv(Constants().econ_country_groups)

        self.time_series_factory = LightTimeSeriesFactory()

        # if Constants().default_time_series_factory == 'lighttimeseriesfactory':
        #     self.time_series_factory = LightTimeSeriesFactory()
        # else:
        #     self.time_series_factory = CachedTimeSeriesFactory()
        # return

    def get_economic_data_history(
        self, start_date, finish_date, country_group, data_type, source="fred", cache_algo="internet_load_return"
    ):

        # vendor_country_codes = self.fred_country_codes[country_group]
        # vendor_pretty_country = self.fred_nice_country_codes[country_group]

        if isinstance(country_group, list):
            pretty_country_names = country_group
        else:
            # get all the country names in the country_group
            pretty_country_names = list(
                self._econ_country_groups[self._econ_country_groups["Country Group"] == country_group]["Country"]
            )

        # construct the pretty tickers
        pretty_tickers = [x + "-" + data_type for x in pretty_country_names]

        # get vendor tickers
        vendor_tickers = []

        for pretty_ticker in pretty_tickers:
            vendor_ticker = list(
                self._all_econ_tickers[self._all_econ_tickers["Full Code"] == pretty_ticker][source].values
            )

            if vendor_ticker == []:
                vendor_ticker = None
                self.logger.error("Could not find match for " + pretty_ticker)
            else:
                vendor_ticker = vendor_ticker[0]

            vendor_tickers.append(vendor_ticker)

        vendor_fields = ["close"]

        if source == "bloomberg":
            vendor_fields = ["PX_LAST"]

        time_series_request = TimeSeriesRequest(
            start_date=start_date,  # start date
            finish_date=finish_date,  # finish date
            category="economic",
            freq="daily",  # intraday data
            data_source=source,  # use Bloomberg as data source
            cut="LOC",
            tickers=pretty_tickers,
            fields=["close"],  # which fields to download
            vendor_tickers=vendor_tickers,
            vendor_fields=vendor_fields,  # which Bloomberg fields to download
            cache_algo=cache_algo,
        )  # how to return data

        return self.time_series_factory.harvest_time_series(time_series_request)

    def grasp_coded_entry(self, df, index):
        df = df.ix[index:].stack()
        df = df.reset_index()
        df.columns = ["Date", "Name", "Val"]

        countries = df["Name"]

        countries = [x.split("-", 1)[0] for x in countries]

        df["Code"] = sum(
            [list(self._econ_country_codes[self._econ_country_codes["Country"] == x]["Code"]) for x in countries], []
        )

        return df
class LightTimeSeriesFactory:
    _time_series_cache = {}  # shared across all instances of object!

    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()
        self._bbg_default_api = Constants().bbg_default_api
        self._intraday_code = -1

        return

    def set_bloomberg_com_api(self):
        """
        set_bloomberg_com_api - Sets Bloomberg API to COM library
        """

        self._bbg_default_api = 'com-api'

    def set_bloomberg_open_api(self):
        """
        set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended)
        """

        self._bbg_default_api = 'open-api'

    def flush_cache(self):
        """
        flush_cache - Flushs internal cache of time series
        """

        self._time_series_cache = {}

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_loader(self, source):
        """
        get_loader - Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.

        Returns
        -------
        LoaderTemplate
        """

        loader = None

        if source == 'bloomberg':

            ### allow use of COM API (older) and Open APIs (newer) for Bloomberg
            if self._bbg_default_api == 'com-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM
                loader = LoaderBBGCOM()
            elif self._bbg_default_api == 'open-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen
                loader = LoaderBBGOpen()

        elif source == 'quandl':
            from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl
            loader = LoaderQuandl()

        elif source == 'dukascopy':
            from pythalesians.market.loaders.lowlevel.brokers.loaderdukascopy import LoaderDukasCopy
            loader = LoaderDukasCopy()

        elif source in ['yahoo', 'google', 'fred']:
            from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb
            loader = LoaderPandasWeb()

        # TODO add support for other data sources (like Reuters)

        return loader

    def harvest_time_series(self, time_series_request, kill_session=True):
        """
        havest_time_series - Loads time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        tickers = time_series_request.tickers
        loader = self.get_loader(time_series_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None:
            create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            time_series_request.tickers = self.config.get_tickers_list_for_category(
                time_series_request.category, time_series_request.source,
                time_series_request.freq, time_series_request.cut)

        # intraday or tick: only one ticker per cache file
        if (time_series_request.freq
                in ['intraday', 'tick', 'second', 'hour', 'minute']):
            data_frame_agg = self.download_intraday_tick(
                time_series_request, loader)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else:
            data_frame_agg = self.download_daily(time_series_request, loader)

        if ('internet_load' in time_series_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to loader template to exit session
            # if loader is not None and kill_session == True: loader.kill_session()

        if (time_series_request.cache_algo == 'cache_algo'):
            self.logger.debug(
                "Only caching data in memory, do not return any time series.")
            return

        tsf = TimeSeriesFilter()

        # only return time series if specified in the algo
        if 'return' in time_series_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if hasattr(time_series_request, 'category'):
                if 'events' in time_series_request.category:
                    return data_frame_agg

            try:
                return tsf.filter_time_series(time_series_request,
                                              data_frame_agg)
            except:
                import traceback

                self.logger.error(traceback.format_exc())

                return None

    def get_time_series_cached(self, time_series_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        if (time_series_request.freq == "intraday"):
            ticker = time_series_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(time_series_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            tsf = TimeSeriesFilter()

            return tsf.filter_time_series(time_series_request, data_frame)

        return None

    def create_time_series_hash_key(self, time_series_request, ticker=None):
        """
        create_time_series_hash_key - Creates a hash key for retrieving the time series

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if (isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(
            self.create_category_key(time_series_request, ticker))

    def download_intraday_tick(self, time_series_request, loader):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None

        ticker_cycle = 0

        # single threaded version
        # handle intraday ticker calls separately one by one
        if len(time_series_request.tickers) == 1 or Constants(
        ).time_series_factory_thread_no['other'] == 1:
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [
                        time_series_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                # we downscale into float32, to avoid memory problems in Python (32 bit)
                # data is stored on disk as float32 anyway
                data_frame_single = loader.load_ticker(
                    time_series_request_single)

                # if the vendor doesn't provide any data, don't attempt to append
                if data_frame_single is not None:
                    if data_frame_single.empty == False:
                        data_frame_single.index.name = 'Date'
                        data_frame_single = data_frame_single.astype('float32')

                        # if you call for returning multiple tickers, be careful with memory considerations!
                        if data_frame_agg is not None:
                            data_frame_agg = data_frame_agg.join(
                                data_frame_single, how='outer')
                        else:
                            data_frame_agg = data_frame_single

                # key = self.create_category_key(time_series_request, ticker)
                # fname = self.create_cache_file_name(key)
                # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

            return data_frame_agg
        else:
            time_series_request_list = []

            # create a list of TimeSeriesRequests
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [
                        time_series_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                time_series_request_list.append(time_series_request_single)

            return self.fetch_group_time_series(time_series_request_list)

    def fetch_single_time_series(self, time_series_request):
        data_frame_single = self.get_loader(
            time_series_request.data_source).load_ticker(time_series_request)

        if data_frame_single is not None:
            if data_frame_single.empty == False:
                data_frame_single.index.name = 'Date'

                # will fail for dataframes which includes dates
                try:
                    data_frame_single = data_frame_single.astype('float32')
                except:
                    pass

                if time_series_request.freq == "second":
                    data_frame_single = data_frame_single.resample("1s")

        return data_frame_single

    def fetch_group_time_series(self, time_series_request_list):

        data_frame_agg = None

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if Constants().time_series_factory_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = Constants().time_series_factory_thread_no['other']

        if time_series_request_list[0].data_source in Constants(
        ).time_series_factory_thread_no:
            thread_no = Constants().time_series_factory_thread_no[
                time_series_request_list[0].data_source]

        pool = Pool(thread_no)

        # open the market data downloads in their own threads and return the results
        result = pool.map_async(self.fetch_single_time_series,
                                time_series_request_list)
        data_frame_group = result.get()

        pool.close()
        pool.join()

        # data_frame_group = results.get()
        # data_frame_group = results
        # data_frame_group = None

        #import multiprocessing as multiprocessing
        # close the pool and wait for the work to finish

        # processes = []

        # for x in range(0, len(time_series_request_list)):
        #    time_series_request = time_series_request_list[x]
        # processes =   [multiprocessing.Process(target = self.fetch_single_time_series,
        #                                           args = (x)) for x in time_series_request_list]

        # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq,
        #             exclude_freq_cat, force_new_download_freq_cat, include_freq_cat))

        # Run processes
        # for p in processes: p.start()

        # Exit the completed processes
        # for p in processes: p.join()

        # collect together all the time series
        if data_frame_group is not None:
            for data_frame_single in data_frame_group:
                # if you call for returning multiple tickers, be careful with memory considerations!
                if data_frame_single is not None:
                    if data_frame_agg is not None:
                        data_frame_agg = data_frame_agg.join(data_frame_single,
                                                             how='outer')
                    else:
                        data_frame_agg = data_frame_single

        return data_frame_agg

    def download_daily(self, time_series_request, loader):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file

        if Constants().time_series_factory_thread_no['other'] == 1:
            data_frame_agg = loader.load_ticker(time_series_request)
        else:
            time_series_request_list = []

            group_size = int(
                len(time_series_request.tickers) /
                Constants().time_series_factory_thread_no['other'] - 1)

            if group_size == 0: group_size = 1

            # split up tickers into groups related to number of threads to call
            for i in range(0, len(time_series_request.tickers), group_size):
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = time_series_request.tickers[
                    i:i + group_size]

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = \
                        time_series_request.vendor_tickers[i:i + group_size]

                time_series_request_list.append(time_series_request_single)

            data_frame_agg = self.fetch_group_time_series(
                time_series_request_list)

        key = self.create_category_key(time_series_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[
            fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def create_category_key(self, time_series_request, ticker=None):
        """
        create_category_key - Returns a category key for the associated TimeSeriesRequest

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        category = 'default-cat'
        cut = 'default-cut'

        if hasattr(time_series_request, 'category'):
            category = time_series_request.category

        environment = time_series_request.environment
        source = time_series_request.data_source
        freq = time_series_request.freq

        if hasattr(time_series_request, 'cut'): cut = time_series_request.cut

        if (ticker is not None):
            key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker
        else:
            key = environment + "." + category + '.' + source + '.' + freq + '.' + cut

        return key

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename