Beispiel #1
0
class IntradayBarRequest(Request):
    def __init__(self, symbol, interval, start=None, end=None, event='TRADE'):
        """
        Intraday bar request for bbg

        Parameters
        ----------
        symbols : string
        interval : number of minutes
        start : start date
        end : end date (if None then use today)
        event : (TRADE,BID,ASK,BEST_BID,BEST_ASK)

        """

        Request.__init__(self)

        self.logger = LoggerManager().getLogger(__name__)

        assert event in ('TRADE', 'BID', 'ASK', 'BEST_BID', 'BEST_ASK')
        assert isinstance(symbol, str)

        if start is None:
            start = datetime.today() - timedelta(30)

        if end is None:
            end = datetime.utcnow()

        self.symbol = symbol
        self.interval = interval
        self.start = to_datetime(start)
        self.end = to_datetime(end)
        self.event = event

        # response related
        self.response = defaultdict(list)

    def get_bbg_service_name(self):
        return '//blp/refdata'

    def get_bbg_request(self, svc, session):

        # create the bbg request object
        start, end = self.start, self.end
        request = svc.CreateRequest('IntradayBarRequest')
        request.Set('security', self.symbol)
        request.Set('interval', self.interval)
        request.Set('eventType', self.event)
        request.Set(
            'startDateTime',
            session.CreateDatetime(start.year, start.month, start.day,
                                   start.hour, start.minute))
        request.Set(
            'endDateTime',
            session.CreateDatetime(end.year, end.month, end.day, end.hour,
                                   end.minute))

        self.logger.info("Fetching intraday data for " + str(self.symbol) +
                         " from " + start.strftime('%d/%m/%Y') + " to " +
                         end.strftime('%d/%m/%Y'))

        return request

    def on_event(self, evt, is_final):
        """
        on_event - This is invoked from in response to COM PumpWaitingMessages - different thread

        """

        response = self.response

        self.logger.debug("Receiving data from Bloomberg...")

        for msg in XmlHelper.message_iter(evt):
            bars = msg.GetElement('barData').GetElement('barTickData')

            self.logger.debug("Read message...")

            for i in range(bars.NumValues):
                bar = bars.GetValue(i)
                ts = bar.GetElement(0).Value

                dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute)

                response['time'].append(dt)
                response['open'].append(bar.GetElement(1).Value)
                response['high'].append(bar.GetElement(2).Value)
                response['low'].append(bar.GetElement(3).Value)
                response['close'].append(bar.GetElement(4).Value)
                response['volume'].append(bar.GetElement(5).Value)
                response['events'].append(bar.GetElement(6).Value)

                if (i % 20000 == 0):
                    dt_str = dt.strftime('%d/%m/%Y')
                    self.logger.debug("Processing " + dt_str)

        self.logger.debug("Finished processing for ticker.")

        if is_final:
            idx = response.pop('time')
            self.response = DataFrame(
                response,
                columns=['open', 'high', 'low', 'close', 'volume', 'events'],
                index=idx)
            self.response.index.name = 'Date'
            self.response = self.response.astype('float32')
Beispiel #2
0
class LoaderBBG(LoaderTemplate):
    def __init__(self):
        super(LoaderBBG, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        time_series_request_vendor = self.construct_vendor_time_series_request(
            time_series_request)

        data_frame = None
        self.logger.info("Request Bloomberg data")

        # do we need daily or intraday data?
        if (time_series_request.freq
                in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']):

            # for events times/dates separately needs ReferenceDataRequest (when specified)
            if 'release-date-time-full' in time_series_request.fields:
                # experimental
                datetime_data_frame = self.get_reference_data(
                    time_series_request_vendor, time_series_request)

                # remove fields 'release-date-time-full' from our request (and the associated field in the vendor)
                index = time_series_request.fields.index(
                    'release-date-time-full')
                time_series_request_vendor.fields.pop(index)
                time_series_request.fields.pop(index)

                # download all the other event fields (uses HistoricalDataRequest to Bloomberg)
                # concatenate with date time fields
                if len(time_series_request_vendor.fields) > 0:
                    events_data_frame = self.get_daily_data(
                        time_series_request, time_series_request_vendor)

                    col = events_data_frame.index.name
                    events_data_frame = events_data_frame.reset_index(
                        drop=False)

                    data_frame = pandas.concat(
                        [events_data_frame, datetime_data_frame], axis=1)
                    temp = data_frame[col]
                    del data_frame[col]
                    data_frame.index = temp
                else:
                    data_frame = datetime_data_frame

            # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg
            else:
                data_frame = self.get_daily_data(time_series_request,
                                                 time_series_request_vendor)

        # assume one ticker only
        # for intraday data we use IntradayDataRequest to Bloomberg
        if (time_series_request.freq in ['intraday', 'minute', 'hourly']):
            time_series_request_vendor.tickers = time_series_request_vendor.tickers[
                0]

            data_frame = self.download_intraday(time_series_request_vendor)

            if data_frame is not None:
                if data_frame.empty:
                    self.logger.info("No tickers returned for: " +
                                     time_series_request_vendor.tickers)

                    return None

                cols = data_frame.columns.values
                data_frame.tz_localize('UTC')
                cols = time_series_request.tickers[0] + "." + cols
                data_frame.columns = cols

        self.logger.info("Completed request from Bloomberg.")

        return data_frame

    def get_daily_data(self, time_series_request, time_series_request_vendor):
        data_frame = self.download_daily(time_series_request_vendor)

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            if data_frame.empty:
                self.logger.info("No tickers returned for...")

                try:
                    self.logger.info(str(time_series_request_vendor.tickers))
                except:
                    pass

                return None

            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields,
                                                      time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def get_reference_data(self, time_series_request_vendor,
                           time_series_request):
        end = datetime.datetime.today()
        end = end.replace(year=end.year + 1)

        time_series_request_vendor.finish_date = end

        self.logger.debug("Requesting ref for " +
                          time_series_request_vendor.tickers[0] + " etc.")

        data_frame = self.download_ref(time_series_request_vendor)

        self.logger.debug("Waiting for ref...")

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields,
                                                      time_series_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined

            data_frame = data_frame.convert_objects(convert_dates='coerce',
                                                    convert_numeric='coerce')

        return data_frame

    # implement method in abstract superclass
    @abc.abstractmethod
    def kill_session(self):
        return

    @abc.abstractmethod
    def download_intraday(self, time_series_request):
        return

    @abc.abstractmethod
    def download_daily(self, time_series_request):
        return

    @abc.abstractmethod
    def download_ref(self, time_series_request):
        return
Beispiel #3
0
class ReferenceDataRequest(Request):
    def __init__(self,
                 symbols,
                 fields,
                 overrides=None,
                 response_type='frame',
                 ignore_security_error=0,
                 ignore_field_error=0):
        """
        response_type: (frame, map) how to return the results
        """
        assert response_type in ('frame', 'map')
        Request.__init__(self,
                         ignore_security_error=ignore_security_error,
                         ignore_field_error=ignore_field_error)
        self.symbols = isinstance(symbols, str) and [symbols] or symbols
        self.fields = isinstance(fields, str) and [fields] or fields
        self.overrides = overrides
        # response related
        self.response = {} if response_type == 'map' else defaultdict(list)
        self.response_type = response_type

        self.logger = LoggerManager().getLogger(__name__)

    def get_bbg_service_name(self):
        return '//blp/refdata'

    def quick_override(self, request, fieldId, val):
        o = request.GetElement('overrides').AppendElment()
        o.SetElement('fieldId', fieldId)
        o.SetElement('value', val)

    def get_bbg_request(self, svc, session):
        # create the bbg request object
        request = svc.CreateRequest('ReferenceDataRequest')
        [
            request.GetElement('securities').AppendValue(sec)
            for sec in self.symbols
        ]
        [request.GetElement('fields').AppendValue(fld) for fld in self.fields]

        #self.quick_override(request, 'START_DT', '19990101')
        #self.quick_override(request, 'END_DT', '20200101')

        self.quick_override(request, 'TIME_ZONE_OVERRIDE',
                            23)  # force GMT time

        Request.apply_overrides(request, self.overrides)
        return request

    def on_security_node(self, node):
        sid = XmlHelper.get_child_value(node, 'security')
        farr = node.GetElement('fieldData')
        fdata = XmlHelper.get_child_values(farr, self.fields)

        self.logger.debug("Returning reference data...")

        assert len(fdata) == len(
            self.fields), 'field length must match data length'

        if self.response_type == 'map':
            self.response[sid] = fdata
        else:
            self.response['security'].append(sid)
            [self.response[f].append(d) for f, d in zip(self.fields, fdata)]

        # Add any field errors if
        ferrors = XmlHelper.get_field_errors(node)
        ferrors and self.field_errors.extend(ferrors)

    def on_event(self, evt, is_final):
        """ this is invoked from in response to COM PumpWaitingMessages - different thread """

        for msg in XmlHelper.message_iter(evt):
            for node, error in XmlHelper.security_iter(
                    msg.GetElement('securityData')):
                if error:
                    self.security_errors.append(error)
                else:
                    self.on_security_node(node)

        if is_final and self.response_type == 'frame':
            index = self.response.pop('security')
            frame = DataFrame(self.response, columns=self.fields, index=index)
            frame.index.name = 'security'
            self.response = frame
class LoaderBBG(LoaderTemplate):

    def __init__(self):
        super(LoaderBBG, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, time_series_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        time_series_request_vendor = self.construct_vendor_time_series_request(time_series_request)

        data_frame = None
        self.logger.info("Request Bloomberg data")

        # do we need daily or intraday data?
        if (time_series_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']):

            # for events times/dates separately needs ReferenceDataRequest (when specified)
            if 'release-date-time-full' in time_series_request.fields:
                # experimental
                datetime_data_frame = self.get_reference_data(time_series_request_vendor, time_series_request)

                # remove fields 'release-date-time-full' from our request (and the associated field in the vendor)
                index = time_series_request.fields.index('release-date-time-full')
                time_series_request_vendor.fields.pop(index)
                time_series_request.fields.pop(index)

                # download all the other event fields (uses HistoricalDataRequest to Bloomberg)
                # concatenate with date time fields
                if len(time_series_request_vendor.fields) > 0:
                    events_data_frame = self.get_daily_data(time_series_request, time_series_request_vendor)

                    col = events_data_frame.index.name
                    events_data_frame = events_data_frame.reset_index(drop = False)

                    data_frame = pandas.concat([events_data_frame, datetime_data_frame], axis = 1)
                    temp = data_frame[col]
                    del data_frame[col]
                    data_frame.index = temp
                else:
                    data_frame = datetime_data_frame

            # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg
            else:
                data_frame = self.get_daily_data(time_series_request, time_series_request_vendor)

        # assume one ticker only
        # for intraday data we use IntradayDataRequest to Bloomberg
        if (time_series_request.freq in ['intraday', 'minute', 'hourly']):
            time_series_request_vendor.tickers = time_series_request_vendor.tickers[0]

            data_frame = self.download_intraday(time_series_request_vendor)

            cols = data_frame.columns.values
            data_frame.tz_localize('UTC')
            cols = time_series_request.tickers[0] + "." + cols
            data_frame.columns = cols

        self.logger.info("Completed request from Bloomberg.")

        return data_frame

    def get_daily_data(self, time_series_request, time_series_request_vendor):
        data_frame = self.download_daily(time_series_request_vendor)

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields, time_series_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def get_reference_data(self, time_series_request_vendor, time_series_request):
        end = datetime.datetime.today()
        end = end.replace(year = end.year + 1)

        time_series_request_vendor.finish_date = end

        self.logger.debug("Requesting ref for " + time_series_request_vendor.tickers[0] + " etc.")

        data_frame = self.download_ref(time_series_request_vendor)

        self.logger.debug("Waiting for ref...")

        # convert from vendor to Thalesians tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields, time_series_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, time_series_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined

            data_frame = data_frame.convert_objects(convert_dates = 'coerce', convert_numeric= 'coerce')

        return data_frame

    # implement method in abstract superclass
    @abc.abstractmethod
    def kill_session(self):
        return

    @abc.abstractmethod
    def download_intraday(self, time_series_request):
        return

    @abc.abstractmethod
    def download_daily(self, time_series_request):
        return

    @abc.abstractmethod
    def download_ref(self, time_series_request):
        return
class LightTimeSeriesFactory:
    _time_series_cache = {} # shared across all instances of object!

    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()
        self._bbg_default_api = Constants().bbg_default_api
        self._intraday_code = -1

        return

    def set_bloomberg_com_api(self):
        """
        set_bloomberg_com_api - Sets Bloomberg API to COM library
        """

        self._bbg_default_api = 'com-api'

    def set_bloomberg_open_api(self):
        """
        set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended)
        """

        self._bbg_default_api = 'open-api'

    def flush_cache(self):
        """
        flush_cache - Flushs internal cache of time series
        """

        self._time_series_cache = {}

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_loader(self, source):
        """
        get_loader - Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.

        Returns
        -------
        LoaderTemplate
        """

        loader = None

        if source == 'bloomberg':

            ### allow use of COM API (older) and Open APIs (newer) for Bloomberg
            if self._bbg_default_api == 'com-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM
                loader = LoaderBBGCOM()
            elif self._bbg_default_api == 'open-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen
                loader = LoaderBBGOpen()

        elif source == 'quandl':
            from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl
            loader = LoaderQuandl()

        elif source == 'dukascopy':
            from pythalesians.market.loaders.lowlevel.brokers.loaderdukascopy import LoaderDukasCopy
            loader = LoaderDukasCopy()

        elif source in ['yahoo', 'google', 'fred']:
            from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb
            loader = LoaderPandasWeb()

        # TODO add support for other data sources (like Reuters)

        return loader

    def harvest_time_series(self, time_series_request, kill_session = True):
        """
        havest_time_series - Loads time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        tickers = time_series_request.tickers
        loader = self.get_loader(time_series_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None : create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            time_series_request.tickers = self.config.get_tickers_list_for_category(
            time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut)

        # intraday or tick: only one ticker per cache file
        if (time_series_request.freq in ['intraday', 'tick']):
            data_frame_agg = self.download_intraday_tick(time_series_request, loader)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else: data_frame_agg = self.download_daily(time_series_request, loader)

        if('internet_load' in time_series_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to loader template to exit session
            if loader is not None and kill_session == True: loader.kill_session()

        if(time_series_request.cache_algo == 'cache_algo'):
            self.logger.debug("Only caching data in memory, do not return any time series."); return

        tsf = TimeSeriesFilter()

        # only return time series if specified in the algo
        if 'return' in time_series_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if hasattr(time_series_request, 'category'):
                if 'events' in time_series_request.category:
                    return data_frame_agg

            try:
                return tsf.filter_time_series(time_series_request, data_frame_agg)
            except:
                import traceback

                self.logger.error(traceback.format_exc())

                return None

    def get_time_series_cached(self, time_series_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        if (time_series_request.freq == "intraday"):
            ticker = time_series_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(time_series_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            tsf = TimeSeriesFilter()

            return tsf.filter_time_series(time_series_request, data_frame)

        return None

    def create_time_series_hash_key(self, time_series_request, ticker = None):
        """
        create_time_series_hash_key - Creates a hash key for retrieving the time series

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if(isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(
            self.create_category_key(time_series_request, ticker))

    def download_intraday_tick(self, time_series_request, loader):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None

        ticker_cycle = 0

        # handle intraday ticker calls separately one by one
        for ticker in time_series_request.tickers:
            time_series_request_single = copy.copy(time_series_request)
            time_series_request_single.tickers = ticker

            if hasattr(time_series_request, 'vendor_tickers'):
                time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]]
                ticker_cycle = ticker_cycle + 1

            # we downscale into float32, to avoid memory problems in Python (32 bit)
            # data is stored on disk as float32 anyway
            data_frame_single = loader.load_ticker(time_series_request_single)

            # if the vendor doesn't provide any data, don't attempt to append
            if data_frame_single is not None:
                if data_frame_single.empty == False:
                    data_frame_single.index.name = 'Date'
                    data_frame_single = data_frame_single.astype('float32')

                    # if you call for returning multiple tickers, be careful with memory considerations!
                    if data_frame_agg is not None:
                        data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                    else:
                        data_frame_agg = data_frame_single

            # key = self.create_category_key(time_series_request, ticker)
            # fname = self.create_cache_file_name(key)
            # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

        return data_frame_agg

    def download_daily(self, time_series_request, loader):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file
        data_frame_agg = loader.load_ticker(time_series_request)

        key = self.create_category_key(time_series_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def create_category_key(self, time_series_request, ticker=None):
        """
        create_category_key - Returns a category key for the associated TimeSeriesRequest

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        category = 'default-cat'
        cut = 'default-cut'

        if hasattr(time_series_request, 'category'): category = time_series_request.category

        source = time_series_request.data_source
        freq = time_series_request.freq

        if hasattr(time_series_request, 'cut'): cut = time_series_request.cut

        if (ticker is not None): key = category + '.' + source + '.' + freq + '.' + cut + '.' + ticker
        else: key = category + '.' + source + '.' + freq + '.' + cut

        return key

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename
class LightTimeSeriesFactory:
    _time_series_cache = {} # shared across all instances of object!

    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()
        self._bbg_default_api = Constants().bbg_default_api
        self._intraday_code = -1

        return

    def set_bloomberg_com_api(self):
        """
        set_bloomberg_com_api - Sets Bloomberg API to COM library
        """

        self._bbg_default_api = 'com-api'

    def set_bloomberg_open_api(self):
        """
        set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended)
        """

        self._bbg_default_api = 'open-api'

    def flush_cache(self):
        """
        flush_cache - Flushs internal cache of time series
        """

        self._time_series_cache = {}

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_loader(self, source):
        """
        get_loader - Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.

        Returns
        -------
        LoaderTemplate
        """

        loader = None

        if source == 'bloomberg':

            ### allow use of COM API (older) and Open APIs (newer) for Bloomberg
            if self._bbg_default_api == 'com-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM
                loader = LoaderBBGCOM()
            elif self._bbg_default_api == 'open-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen
                loader = LoaderBBGOpen()

        elif source == 'quandl':
            from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl
            loader = LoaderQuandl()

        elif source == 'dukascopy':
            from pythalesians.market.loaders.lowlevel.brokers.loaderdukascopy import LoaderDukasCopy
            loader = LoaderDukasCopy()

        elif source in ['yahoo', 'google', 'fred']:
            from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb
            loader = LoaderPandasWeb()

        # TODO add support for other data sources (like Reuters)

        return loader

    def harvest_time_series(self, time_series_request, kill_session = True):
        """
        havest_time_series - Loads time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        tickers = time_series_request.tickers
        loader = self.get_loader(time_series_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None :
            create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            time_series_request.tickers = self.config.get_tickers_list_for_category(
            time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut)

        # intraday or tick: only one ticker per cache file
        if (time_series_request.freq in ['intraday', 'tick']):
            data_frame_agg = self.download_intraday_tick(time_series_request, loader)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else: data_frame_agg = self.download_daily(time_series_request, loader)

        if('internet_load' in time_series_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to loader template to exit session
            # if loader is not None and kill_session == True: loader.kill_session()

        if(time_series_request.cache_algo == 'cache_algo'):
            self.logger.debug("Only caching data in memory, do not return any time series."); return

        tsf = TimeSeriesFilter()

        # only return time series if specified in the algo
        if 'return' in time_series_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if hasattr(time_series_request, 'category'):
                if 'events' in time_series_request.category:
                    return data_frame_agg

            try:
                return tsf.filter_time_series(time_series_request, data_frame_agg)
            except:
                import traceback

                self.logger.error(traceback.format_exc())

                return None

    def get_time_series_cached(self, time_series_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        if (time_series_request.freq == "intraday"):
            ticker = time_series_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(time_series_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            tsf = TimeSeriesFilter()

            return tsf.filter_time_series(time_series_request, data_frame)

        return None

    def create_time_series_hash_key(self, time_series_request, ticker = None):
        """
        create_time_series_hash_key - Creates a hash key for retrieving the time series

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if(isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(self.create_category_key(time_series_request, ticker))

    def download_intraday_tick(self, time_series_request, loader):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None

        ticker_cycle = 0

        # single threaded version
        # handle intraday ticker calls separately one by one
        if len(time_series_request.tickers) == 1 or Constants().time_series_factory_thread_no['other'] == 1:
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]]
                    ticker_cycle = ticker_cycle + 1

                # we downscale into float32, to avoid memory problems in Python (32 bit)
                # data is stored on disk as float32 anyway
                data_frame_single = loader.load_ticker(time_series_request_single)

                # if the vendor doesn't provide any data, don't attempt to append
                if data_frame_single is not None:
                    if data_frame_single.empty == False:
                        data_frame_single.index.name = 'Date'
                        data_frame_single = data_frame_single.astype('float32')

                        # if you call for returning multiple tickers, be careful with memory considerations!
                        if data_frame_agg is not None:
                            data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                        else:
                            data_frame_agg = data_frame_single

                # key = self.create_category_key(time_series_request, ticker)
                # fname = self.create_cache_file_name(key)
                # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

            return data_frame_agg
        else:
            time_series_request_list = []

            # create a list of TimeSeriesRequests
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]]
                    ticker_cycle = ticker_cycle + 1

                time_series_request_list.append(time_series_request_single)

            return self.fetch_group_time_series(time_series_request_list)

    def fetch_single_time_series(self, time_series_request):
        data_frame_single = self.get_loader(time_series_request.data_source).load_ticker(time_series_request)

        if data_frame_single is not None:
            if data_frame_single.empty == False:
                data_frame_single.index.name = 'Date'
                data_frame_single = data_frame_single.astype('float32')

        return data_frame_single

    def fetch_group_time_series(self, time_series_request_list):

        data_frame_agg = None

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if Constants().time_series_factory_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = Constants().time_series_factory_thread_no['other']

        if time_series_request_list[0].data_source in Constants().time_series_factory_thread_no:
            thread_no = Constants().time_series_factory_thread_no[time_series_request_list[0].data_source]

        pool = Pool(thread_no)

        # open the market data downloads in their own threads and return the results
        result = pool.map_async(self.fetch_single_time_series, time_series_request_list)
        data_frame_group = result.get()

        pool.close()
        pool.join()

        # data_frame_group = results.get()
        # data_frame_group = results
        # data_frame_group = None

        #import multiprocessing as multiprocessing
        # close the pool and wait for the work to finish

        # processes = []

        # for x in range(0, len(time_series_request_list)):
        #    time_series_request = time_series_request_list[x]
        # processes =   [multiprocessing.Process(target = self.fetch_single_time_series,
        #                                           args = (x)) for x in time_series_request_list]

        # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq,
        #             exclude_freq_cat, force_new_download_freq_cat, include_freq_cat))

        # Run processes
        # for p in processes: p.start()

        # Exit the completed processes
        # for p in processes: p.join()

        # collect together all the time series
        if data_frame_group is not None:
            for data_frame_single in data_frame_group:
                # if you call for returning multiple tickers, be careful with memory considerations!
                if data_frame_single is not None:
                    if data_frame_agg is not None:
                        data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                    else:
                        data_frame_agg = data_frame_single

        return data_frame_agg

    def download_daily(self, time_series_request, loader):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file

        if Constants().time_series_factory_thread_no['other'] == 1:
            data_frame_agg = loader.load_ticker(time_series_request)
        else:
            time_series_request_list = []

            group_size = int(len(time_series_request.tickers) / Constants().time_series_factory_thread_no['other'] - 1)

            if group_size == 0: group_size = 1

            # split up tickers into groups related to number of threads to call
            for i in range(0, len(time_series_request.tickers), group_size):
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = time_series_request.tickers[i:i + group_size]

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = \
                        time_series_request.vendor_tickers[i:i + group_size]

                time_series_request_list.append(time_series_request_single)

            data_frame_agg = self.fetch_group_time_series(time_series_request_list)

        key = self.create_category_key(time_series_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def create_category_key(self, time_series_request, ticker=None):
        """
        create_category_key - Returns a category key for the associated TimeSeriesRequest

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        category = 'default-cat'
        cut = 'default-cut'

        if hasattr(time_series_request, 'category'): category = time_series_request.category

        environment = time_series_request.environment
        source = time_series_request.data_source
        freq = time_series_request.freq

        if hasattr(time_series_request, 'cut'): cut = time_series_request.cut

        if (ticker is not None): key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker
        else: key = environment + "." + category + '.' + source + '.' + freq + '.' + cut

        return key

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename
class IntradayBarRequest(Request):

    def __init__(self, symbol, interval, start=None, end=None, event='TRADE'):
        """
        Intraday bar request for bbg

        Parameters
        ----------
        symbols : string
        interval : number of minutes
        start : start date
        end : end date (if None then use today)
        event : (TRADE,BID,ASK,BEST_BID,BEST_ASK)

        """

        Request.__init__(self)

        self.logger = LoggerManager().getLogger(__name__)

        assert event in ('TRADE', 'BID', 'ASK', 'BEST_BID', 'BEST_ASK')
        assert isinstance(symbol, str)

        if start is None:
            start = datetime.today() - timedelta(30)

        if end is None:
            end = datetime.utcnow()

        self.symbol = symbol
        self.interval = interval
        self.start = to_datetime(start)
        self.end = to_datetime(end)
        self.event = event

        # response related
        self.response = defaultdict(list)

    def get_bbg_service_name(self):
        return '//blp/refdata'

    def get_bbg_request(self, svc, session):

        # create the bbg request object
        start, end = self.start, self.end
        request = svc.CreateRequest('IntradayBarRequest')
        request.Set('security', self.symbol)
        request.Set('interval', self.interval)
        request.Set('eventType', self.event)
        request.Set('startDateTime', session.CreateDatetime(start.year, start.month, start.day, start.hour, start.minute))
        request.Set('endDateTime', session.CreateDatetime(end.year, end.month, end.day, end.hour, end.minute))

        self.logger.info("Fetching intraday data for " + str(self.symbol) + " from "
                         + start.strftime('%d/%m/%Y') + " to " + end.strftime('%d/%m/%Y'))

        return request

    def on_event(self, evt, is_final):
        """
        on_event - This is invoked from in response to COM PumpWaitingMessages - different thread

        """

        response = self.response

        self.logger.debug("Receiving data from Bloomberg...")

        for msg in XmlHelper.message_iter(evt):
            bars = msg.GetElement('barData').GetElement('barTickData')

            self.logger.debug("Read message...")

            for i in range(bars.NumValues):
                bar = bars.GetValue(i)
                ts = bar.GetElement(0).Value

                dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute)

                response['time'].append(dt)
                response['open'].append(bar.GetElement(1).Value)
                response['high'].append(bar.GetElement(2).Value)
                response['low'].append(bar.GetElement(3).Value)
                response['close'].append(bar.GetElement(4).Value)
                response['volume'].append(bar.GetElement(5).Value)
                response['events'].append(bar.GetElement(6).Value)

                if (i % 20000 == 0):
                    dt_str = dt.strftime('%d/%m/%Y')
                    self.logger.debug("Processing " + dt_str)

        self.logger.debug("Finished processing for ticker.")

        if is_final:
            idx = response.pop('time')
            self.response = DataFrame(response, columns=['open', 'high', 'low', 'close', 'volume', 'events'],
                                      index=idx)
            self.response.index.name = 'Date'
            self.response = self.response.astype('float32')
class ReferenceDataRequest(Request):

    def __init__(self, symbols, fields, overrides=None, response_type='frame', ignore_security_error=0, ignore_field_error=0):
        """
        response_type: (frame, map) how to return the results
        """
        assert response_type in ('frame', 'map')
        Request.__init__(self, ignore_security_error=ignore_security_error, ignore_field_error=ignore_field_error)
        self.symbols = isinstance(symbols, str) and [symbols] or symbols
        self.fields = isinstance(fields, str) and [fields] or fields
        self.overrides = overrides
        # response related
        self.response = {} if response_type == 'map' else defaultdict(list)
        self.response_type = response_type

        self.logger = LoggerManager().getLogger(__name__)

    def get_bbg_service_name(self):
        return '//blp/refdata'

    def quick_override(self, request, fieldId, val):
        o = request.GetElement('overrides').AppendElment()
        o.SetElement('fieldId', fieldId)
        o.SetElement('value', val)

    def get_bbg_request(self, svc, session):
        # create the bbg request object
        request = svc.CreateRequest('ReferenceDataRequest')
        [request.GetElement('securities').AppendValue(sec) for sec in self.symbols]
        [request.GetElement('fields').AppendValue(fld) for fld in self.fields]

        #self.quick_override(request, 'START_DT', '19990101')
        #self.quick_override(request, 'END_DT', '20200101')

        self.quick_override(request, 'TIME_ZONE_OVERRIDE', 23)  # force GMT time

        Request.apply_overrides(request, self.overrides)
        return request

    def on_security_node(self, node):
        sid = XmlHelper.get_child_value(node, 'security')
        farr = node.GetElement('fieldData')
        fdata = XmlHelper.get_child_values(farr, self.fields)

        self.logger.debug("Returning reference data...")

        assert len(fdata) == len(self.fields), 'field length must match data length'

        if self.response_type == 'map':
            self.response[sid] = fdata
        else:
            self.response['security'].append(sid)
            [self.response[f].append(d) for f, d in zip(self.fields, fdata)]

        # Add any field errors if
        ferrors = XmlHelper.get_field_errors(node)
        ferrors and self.field_errors.extend(ferrors)

    def on_event(self, evt, is_final):
        """ this is invoked from in response to COM PumpWaitingMessages - different thread """

        for msg in XmlHelper.message_iter(evt):
            for node, error in XmlHelper.security_iter(msg.GetElement('securityData')):
                if error:
                    self.security_errors.append(error)
                else:
                    self.on_security_node(node)

        if is_final and self.response_type == 'frame':
            index = self.response.pop('security')
            frame = DataFrame(self.response, columns=self.fields, index=index)
            frame.index.name = 'security'
            self.response = frame
class LightTimeSeriesFactory:
    _time_series_cache = {}  # shared across all instances of object!

    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()
        self._bbg_default_api = Constants().bbg_default_api
        self._intraday_code = -1

        return

    def set_bloomberg_com_api(self):
        """
        set_bloomberg_com_api - Sets Bloomberg API to COM library
        """

        self._bbg_default_api = 'com-api'

    def set_bloomberg_open_api(self):
        """
        set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended)
        """

        self._bbg_default_api = 'open-api'

    def flush_cache(self):
        """
        flush_cache - Flushs internal cache of time series
        """

        self._time_series_cache = {}

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_loader(self, source):
        """
        get_loader - Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.

        Returns
        -------
        LoaderTemplate
        """

        loader = None

        if source == 'bloomberg':

            ### allow use of COM API (older) and Open APIs (newer) for Bloomberg
            if self._bbg_default_api == 'com-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM
                loader = LoaderBBGCOM()
            elif self._bbg_default_api == 'open-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen
                loader = LoaderBBGOpen()

        elif source == 'quandl':
            from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl
            loader = LoaderQuandl()

        elif source == 'dukascopy':
            from pythalesians.market.loaders.lowlevel.brokers.loaderdukascopy import LoaderDukasCopy
            loader = LoaderDukasCopy()

        elif source in ['yahoo', 'google', 'fred']:
            from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb
            loader = LoaderPandasWeb()

        # TODO add support for other data sources (like Reuters)

        return loader

    def harvest_time_series(self, time_series_request, kill_session=True):
        """
        havest_time_series - Loads time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        tickers = time_series_request.tickers
        loader = self.get_loader(time_series_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None:
            create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            time_series_request.tickers = self.config.get_tickers_list_for_category(
                time_series_request.category, time_series_request.source,
                time_series_request.freq, time_series_request.cut)

        # intraday or tick: only one ticker per cache file
        if (time_series_request.freq
                in ['intraday', 'tick', 'second', 'hour', 'minute']):
            data_frame_agg = self.download_intraday_tick(
                time_series_request, loader)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else:
            data_frame_agg = self.download_daily(time_series_request, loader)

        if ('internet_load' in time_series_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to loader template to exit session
            # if loader is not None and kill_session == True: loader.kill_session()

        if (time_series_request.cache_algo == 'cache_algo'):
            self.logger.debug(
                "Only caching data in memory, do not return any time series.")
            return

        tsf = TimeSeriesFilter()

        # only return time series if specified in the algo
        if 'return' in time_series_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if hasattr(time_series_request, 'category'):
                if 'events' in time_series_request.category:
                    return data_frame_agg

            try:
                return tsf.filter_time_series(time_series_request,
                                              data_frame_agg)
            except:
                import traceback

                self.logger.error(traceback.format_exc())

                return None

    def get_time_series_cached(self, time_series_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        if (time_series_request.freq == "intraday"):
            ticker = time_series_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(time_series_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            tsf = TimeSeriesFilter()

            return tsf.filter_time_series(time_series_request, data_frame)

        return None

    def create_time_series_hash_key(self, time_series_request, ticker=None):
        """
        create_time_series_hash_key - Creates a hash key for retrieving the time series

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if (isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(
            self.create_category_key(time_series_request, ticker))

    def download_intraday_tick(self, time_series_request, loader):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None

        ticker_cycle = 0

        # single threaded version
        # handle intraday ticker calls separately one by one
        if len(time_series_request.tickers) == 1 or Constants(
        ).time_series_factory_thread_no['other'] == 1:
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [
                        time_series_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                # we downscale into float32, to avoid memory problems in Python (32 bit)
                # data is stored on disk as float32 anyway
                data_frame_single = loader.load_ticker(
                    time_series_request_single)

                # if the vendor doesn't provide any data, don't attempt to append
                if data_frame_single is not None:
                    if data_frame_single.empty == False:
                        data_frame_single.index.name = 'Date'
                        data_frame_single = data_frame_single.astype('float32')

                        # if you call for returning multiple tickers, be careful with memory considerations!
                        if data_frame_agg is not None:
                            data_frame_agg = data_frame_agg.join(
                                data_frame_single, how='outer')
                        else:
                            data_frame_agg = data_frame_single

                # key = self.create_category_key(time_series_request, ticker)
                # fname = self.create_cache_file_name(key)
                # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

            return data_frame_agg
        else:
            time_series_request_list = []

            # create a list of TimeSeriesRequests
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [
                        time_series_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                time_series_request_list.append(time_series_request_single)

            return self.fetch_group_time_series(time_series_request_list)

    def fetch_single_time_series(self, time_series_request):
        data_frame_single = self.get_loader(
            time_series_request.data_source).load_ticker(time_series_request)

        if data_frame_single is not None:
            if data_frame_single.empty == False:
                data_frame_single.index.name = 'Date'

                # will fail for dataframes which includes dates
                try:
                    data_frame_single = data_frame_single.astype('float32')
                except:
                    pass

                if time_series_request.freq == "second":
                    data_frame_single = data_frame_single.resample("1s")

        return data_frame_single

    def fetch_group_time_series(self, time_series_request_list):

        data_frame_agg = None

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if Constants().time_series_factory_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = Constants().time_series_factory_thread_no['other']

        if time_series_request_list[0].data_source in Constants(
        ).time_series_factory_thread_no:
            thread_no = Constants().time_series_factory_thread_no[
                time_series_request_list[0].data_source]

        pool = Pool(thread_no)

        # open the market data downloads in their own threads and return the results
        result = pool.map_async(self.fetch_single_time_series,
                                time_series_request_list)
        data_frame_group = result.get()

        pool.close()
        pool.join()

        # data_frame_group = results.get()
        # data_frame_group = results
        # data_frame_group = None

        #import multiprocessing as multiprocessing
        # close the pool and wait for the work to finish

        # processes = []

        # for x in range(0, len(time_series_request_list)):
        #    time_series_request = time_series_request_list[x]
        # processes =   [multiprocessing.Process(target = self.fetch_single_time_series,
        #                                           args = (x)) for x in time_series_request_list]

        # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq,
        #             exclude_freq_cat, force_new_download_freq_cat, include_freq_cat))

        # Run processes
        # for p in processes: p.start()

        # Exit the completed processes
        # for p in processes: p.join()

        # collect together all the time series
        if data_frame_group is not None:
            for data_frame_single in data_frame_group:
                # if you call for returning multiple tickers, be careful with memory considerations!
                if data_frame_single is not None:
                    if data_frame_agg is not None:
                        data_frame_agg = data_frame_agg.join(data_frame_single,
                                                             how='outer')
                    else:
                        data_frame_agg = data_frame_single

        return data_frame_agg

    def download_daily(self, time_series_request, loader):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file

        if Constants().time_series_factory_thread_no['other'] == 1:
            data_frame_agg = loader.load_ticker(time_series_request)
        else:
            time_series_request_list = []

            group_size = int(
                len(time_series_request.tickers) /
                Constants().time_series_factory_thread_no['other'] - 1)

            if group_size == 0: group_size = 1

            # split up tickers into groups related to number of threads to call
            for i in range(0, len(time_series_request.tickers), group_size):
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = time_series_request.tickers[
                    i:i + group_size]

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = \
                        time_series_request.vendor_tickers[i:i + group_size]

                time_series_request_list.append(time_series_request_single)

            data_frame_agg = self.fetch_group_time_series(
                time_series_request_list)

        key = self.create_category_key(time_series_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[
            fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def create_category_key(self, time_series_request, ticker=None):
        """
        create_category_key - Returns a category key for the associated TimeSeriesRequest

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        category = 'default-cat'
        cut = 'default-cut'

        if hasattr(time_series_request, 'category'):
            category = time_series_request.category

        environment = time_series_request.environment
        source = time_series_request.data_source
        freq = time_series_request.freq

        if hasattr(time_series_request, 'cut'): cut = time_series_request.cut

        if (ticker is not None):
            key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker
        else:
            key = environment + "." + category + '.' + source + '.' + freq + '.' + cut

        return key

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename
class LightTimeSeriesFactory:
    _time_series_cache = {} # shared across all instances of object!

    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()
        self._bbg_default_api = Constants().bbg_default_api
        self._intraday_code = -1

        return

    def set_bloomberg_com_api(self):
        """
        set_bloomberg_com_api - Sets Bloomberg API to COM library

        """

        self._bbg_default_api = 'com-api'

    def set_bloomberg_open_api(self):
        """
        set_bloomberg_open_api - Sets Bloomberg API to OpenAPI (recommended)

        """

        self._bbg_default_api = 'open-api'

    def flush_cache(self):
        """
        flush_cache - Flushs internal cache of time series

        """
        self._time_series_cache = {}

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_loader(self, source):
        """
        get_loader - Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.

        Returns
        -------
        LoaderTemplate
        """

        loader = None

        if source == 'bloomberg':

            ### allow use of COM API (older) and Open APIs (newer) for Bloomberg
            if self._bbg_default_api == 'com-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbg import LoaderBBGCOM
                loader = LoaderBBGCOM()
            elif self._bbg_default_api == 'open-api':
                from pythalesians.market.loaders.lowlevel.bbg.loaderbbgopen import LoaderBBGOpen
                loader = LoaderBBGOpen()

        elif source == 'quandl':
            from pythalesians.market.loaders.lowlevel.quandl.loaderquandl import LoaderQuandl
            loader = LoaderQuandl()

        elif source in ['yahoo', 'google', 'fred']:
            from pythalesians.market.loaders.lowlevel.pandasweb.loaderpandasweb import LoaderPandasWeb
            loader = LoaderPandasWeb()

        # TODO add support for other data sources (like Reuters)

        return loader

    def harvest_time_series(self, time_series_request, kill_session = True):
        """
        havest_time_series - Loads time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        DataFrame
        """

        tickers = time_series_request.tickers
        loader = self.get_loader(time_series_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None : create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            time_series_request.tickers = self.config.get_tickers_list_for_category(
            time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut)

        # intraday or tick: only one ticker per cache file
        if (time_series_request.freq in ['intraday', 'tick']):
            data_frame_agg = self.download_intraday_tick(time_series_request, loader)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else: data_frame_agg = self.download_daily(time_series_request, loader)

        if('internet_load' in time_series_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to loader template to exit session
            if loader is not None and kill_session == True: loader.kill_session()

        if(time_series_request.cache_algo == 'cache_algo'):
            self.logger.debug("Only caching data in memory, do not return any time series."); return

        tsf = TimeSeriesFilter()

        # only return time series if specified in the algo
        if 'return' in time_series_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if hasattr(time_series_request, 'category'):
                if 'events' in time_series_request.category:
                    return data_frame_agg

            try:
                return tsf.filter_time_series(time_series_request, data_frame_agg)
            except:
                return None

    def get_time_series_cached(self, time_series_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        DataFrame
        """

        if (time_series_request.freq == "intraday"):
            ticker = time_series_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(time_series_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            tsf = TimeSeriesFilter()

            return tsf.filter_time_series(time_series_request, data_frame)

        return None

    def create_time_series_hash_key(self, time_series_request, ticker = None):
        """
        create_time_series_hash_key - Creates a hash key for retrieving the time series

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if(isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(
            self.create_category_key(time_series_request, ticker))

    def download_intraday_tick(self, time_series_request, loader):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        DataFrame
        """

        data_frame_agg = None

        ticker_cycle = 0

        # handle intraday ticker calls separately one by one
        for ticker in time_series_request.tickers:
            time_series_request_single = copy.copy(time_series_request)
            time_series_request_single.tickers = ticker

            if hasattr(time_series_request, 'vendor_tickers'):
                time_series_request_single.vendor_tickers = time_series_request.vendor_tickers[ticker_cycle]
                ticker_cycle = ticker_cycle + 1

            # we downscale into float32, to avoid memory problems in Python (32 bit)
            # data is stored on disk as float32 anyway
            data_frame_single = loader.load_ticker(time_series_request_single)

            # if the vendor doesn't provide any data, don't attempt to append
            if data_frame_single is not None:
                if data_frame_single.empty == False:
                    data_frame_single.index.name = 'Date'
                    data_frame_single = data_frame_single.astype('float32')

                    # if you call for returning multiple tickers, be careful with memory considerations!
                    if data_frame_agg is not None:
                        data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                    else:
                        data_frame_agg = data_frame_single

            # key = self.create_category_key(time_series_request, ticker)
            # fname = self.create_cache_file_name(key)
            # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

        return data_frame_agg

    def download_daily(self, time_series_request, loader):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file
        data_frame_agg = loader.load_ticker(time_series_request)

        key = self.create_category_key(time_series_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def create_category_key(self, time_series_request, ticker=None):
        """
        create_category_key - Returns a category key for the associated TimeSeriesRequest

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str

        """
        category = 'default-cat'
        cut = 'default-cut'

        if hasattr(time_series_request, 'category'): category = time_series_request.category

        source = time_series_request.data_source
        freq = time_series_request.freq

        if hasattr(time_series_request, 'cut'): cut = time_series_request.cut

        if (ticker is not None): key = category + '.' + source + '.' + freq + '.' + cut + '.' + ticker
        else: key = category + '.' + source + '.' + freq + '.' + cut

        return key

    def create_cache_file_name(self, filename):
        return Constants().folder_time_series_data + "/" + filename