Ejemplo n.º 1
0
    def tickers(self, tickers):
        if tickers is not None:
            if not isinstance(tickers, list):
                tickers = [tickers]

        config = None

        new_tickers = []

        if tickers is not None:
            for tick in tickers:
                if '*' in tick:
                    start = ''

                    if tick[-1] == "*" and tick[0] != "*":
                        start = "^"

                    tick = start + "(" + tick.replace('*', '') + ")"

                    if config is None:
                        from findatapy.util import ConfigManager
                        config = ConfigManager().get_instance()

                    new_tickers.append(config.get_filtered_tickers_list_for_category(
                        self.__category, self.__data_source, self.__freq, self.__cut, tick))
                else:
                    new_tickers.append(tick)

            new_tickers = self._flatten_list(new_tickers)

            self.__tickers = new_tickers
        else:
            self.__tickers = tickers
Ejemplo n.º 2
0
    def __init__(self):
        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.calculations = Calculations()
        self.io_engine = IOEngine()
        self._intraday_code = -1

        return
Ejemplo n.º 3
0
    def __init__(self):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.io_engine = IOEngine()

        if (EventsFactory._econ_data_frame is None):
            self.load_economic_events()
        return
Ejemplo n.º 4
0
    def __init__(self, df=None):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.io_engine = IOEngine()

        if df is not None:
            self._econ_data_frame = df
        else:
            self.load_economic_events()

        return
Ejemplo n.º 5
0
    def __init__(self, df=None):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.filter = Filter()
        self.io_engine = IOEngine()
        self.speed_cache = SpeedCache()

        if df is not None:
            self._econ_data_frame = df
        else:
            self.load_economic_events()

        return
Ejemplo n.º 6
0
    def refine_expiry_date(self, market_data_request):

        # expiry date
        if market_data_request.expiry_date is None:
            ConfigManager().get_instance().get_expiry_for_ticker(market_data_request.data_source, market_data_request.ticker)

        return market_data_request
Ejemplo n.º 7
0
    def __init__(self):
        super(DataVendor, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        import logging
        logging.getLogger("requests").setLevel(logging.WARNING)
        self.config = ConfigManager()
Ejemplo n.º 8
0
    def __init__(self):
        self.config = ConfigManager().get_instance()
        self.filter = Filter()
        self.calculations = Calculations()
        self.io_engine = IOEngine()
        self._intraday_code = -1
        self.days_expired_intraday_contract_download = -1

        return
Ejemplo n.º 9
0
    def __init__(self, data_vendor_dict={}):
        self._config = ConfigManager().get_instance()
        self._filter = Filter()
        self._calculations = Calculations()
        self._io_engine = IOEngine()
        self._intraday_code = -1
        self._days_expired_intraday_contract_download = -1
        self._data_vendor_dict = data_vendor_dict

        return
Ejemplo n.º 10
0
    def __init__(self, log_every_day = 1):
        self.config = ConfigManager().get_instance()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.calculations = Calculations()
        self.io_engine = IOEngine()
        self._intraday_code = -1
        self.days_expired_intraday_contract_download = -1
        self.log_every_day = log_every_day

        return
Ejemplo n.º 11
0
    def __init__(self, df = None):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.io_engine = IOEngine()
        self.speed_cache = SpeedCache()

        if df is not None:
            self._econ_data_frame = df
        else:
            self.load_economic_events()

        return
Ejemplo n.º 12
0
class DataVendor(object):
    """Abstract class for various data source loaders.

    """
    def __init__(self):
        self.config = ConfigManager().get_instance()
        # self.config = None
        return

    @abc.abstractmethod
    def load_ticker(self, market_data_request):
        """Retrieves market data from external data source

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        return

        # to be implemented by subclasses

    @abc.abstractmethod
    def kill_session(self):
        return

    def construct_vendor_market_data_request(self, market_data_request):
        """Creates a MarketDataRequest with the vendor tickers

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        MarketDataRequest
        """

        symbols_vendor = self.translate_to_vendor_ticker(market_data_request)
        fields_vendor = self.translate_to_vendor_field(market_data_request)

        market_data_request_vendor = MarketDataRequest(
            md_request=market_data_request)

        market_data_request_vendor.tickers = symbols_vendor
        market_data_request_vendor.fields = fields_vendor

        return market_data_request_vendor

    def translate_to_vendor_field(self, market_data_request):
        """Converts all the fields from findatapy fields to vendor fields

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        List of Strings
        """

        if market_data_request.vendor_fields is not None:
            return market_data_request.vendor_fields

        source = market_data_request.data_source
        fields_list = market_data_request.fields

        if isinstance(fields_list, str):
            fields_list = [fields_list]

        if self.config is None: return fields_list

        fields_converted = []

        for field in fields_list:
            try:
                f = self.config.convert_library_to_vendor_field(source, field)
            except:
                logger = LoggerManager().getLogger(__name__)
                logger.warn(
                    "Couldn't find field conversion, did you type it correctly: "
                    + field)

                return

            fields_converted.append(f)

        return fields_converted

    # Translate findatapy ticker to vendor ticker
    def translate_to_vendor_ticker(self, market_data_request):
        """Converts all the tickers from findatapy tickers to vendor tickers

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        List of Strings
        """

        if market_data_request.vendor_tickers is not None:
            return market_data_request.vendor_tickers

        category = market_data_request.category
        source = market_data_request.data_source
        freq = market_data_request.freq
        cut = market_data_request.cut
        tickers_list = market_data_request.tickers

        if isinstance(tickers_list, str):
            tickers_list = [tickers_list]

        if self.config is None: return tickers_list

        tickers_list_converted = []

        for ticker in tickers_list:
            try:
                t = self.config.convert_library_to_vendor_ticker(
                    category, source, freq, cut, ticker)
            except:
                logger = LoggerManager().getLogger(__name__)
                logger.error(
                    "Couldn't find ticker conversion, did you type it correctly: "
                    + ticker)

                return

            tickers_list_converted.append(t)

        return tickers_list_converted

    def translate_from_vendor_field(self, vendor_fields_list,
                                    market_data_request):
        """Converts all the fields from vendors fields to findatapy fields

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        List of Strings
        """

        data_source = market_data_request.data_source

        if isinstance(vendor_fields_list, str):
            vendor_fields_list = [vendor_fields_list]

        # if self.config is None: return vendor_fields_list

        fields_converted = []

        # If we haven't set the configuration files for automatic configuration
        if market_data_request.vendor_fields is not None:

            dictionary = dict(
                zip(
                    self.get_lower_case_list(
                        market_data_request.vendor_fields),
                    market_data_request.fields))

            for vendor_field in vendor_fields_list:
                try:
                    fields_converted.append(dictionary[vendor_field.lower()])
                except:
                    fields_converted.append(vendor_field)

        # Otherwise used stored configuration files (every field needs to be defined!)
        else:
            for vendor_field in vendor_fields_list:
                try:
                    v = self.config.convert_vendor_to_library_field(
                        data_source, vendor_field)
                except:
                    logger = LoggerManager().getLogger(__name__)
                    logger.error(
                        "Couldn't find field conversion, did you type it correctly: "
                        + vendor_field + ", using 'close' as default.")

                    v = 'close'

                fields_converted.append(v)

        return fields_converted

    # Translate findatapy ticker to vendor ticker
    def translate_from_vendor_ticker(self, vendor_tickers_list, md_request):
        """Converts all the fields from vendor tickers to findatapy tickers

        Parameters
        ----------
        md_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        List of Strings
        """

        if md_request.vendor_tickers is not None:

            dictionary = dict(
                zip(self.get_lower_case_list(md_request.vendor_tickers),
                    md_request.tickers))

            tickers_stuff = []

            for vendor_ticker in vendor_tickers_list:
                tickers_stuff.append(dictionary[vendor_ticker.lower()])

            return tickers_stuff  # [item for sublist in tickers_stuff for item in sublist]

        # tickers_list = md_request.tickers

        if isinstance(vendor_tickers_list, str):
            vendor_tickers_list = [vendor_tickers_list]

        if self.config is None: return vendor_tickers_list

        tickers_converted = []

        for vendor_ticker in vendor_tickers_list:
            try:
                v = self.config.convert_vendor_to_library_ticker(
                    md_request.category, md_request.data_source,
                    md_request.freq, md_request.cut, vendor_ticker)
            except:
                logger = LoggerManager().getLogger(__name__)
                logger.error(
                    "Couldn't find ticker conversion, did you type it correctly: "
                    + vendor_ticker)

                return

            tickers_converted.append(v)

        return tickers_converted

    def get_lower_case_list(self, lst):
        return [k.lower() for k in lst]
Ejemplo n.º 13
0
    def fetch_single_time_series(self, market_data_request):

        market_data_request = MarketDataRequest(md_request=market_data_request)

        # only includes those tickers have not expired yet!
        start_date = pandas.Timestamp(market_data_request.start_date).date()

        import datetime

        current_date = datetime.datetime.utcnow().date()

        from datetime import timedelta

        tickers = market_data_request.tickers
        vendor_tickers = market_data_request.vendor_tickers

        expiry_date = market_data_request.expiry_date

        config = ConfigManager().get_instance()

        # in many cases no expiry is defined so skip them
        for i in range(0, len(tickers)):
            try:
                expiry_date = config.get_expiry_for_ticker(
                    market_data_request.data_source, tickers[i])
            except:
                pass

            if expiry_date is not None:
                expiry_date = pandas.Timestamp(expiry_date).date()

                # use pandas Timestamp, a bit more robust with weird dates (can fail if comparing date vs datetime)
                # if the expiry is before the start date of our download don't bother downloading this ticker
                if expiry_date < start_date:
                    tickers[i] = None

                # special case for futures-contracts which are intraday
                # avoid downloading if the expiry date is very far in the past
                # (we need this before there might be odd situations where we run on an expiry date, but still want to get
                # data right till expiry time)
                if market_data_request.category == 'futures-contracts' and market_data_request.freq == 'intraday' \
                        and self.days_expired_intraday_contract_download > 0:

                    if expiry_date + timedelta(
                            days=self.days_expired_intraday_contract_download
                    ) < current_date:
                        tickers[i] = None

                if vendor_tickers is not None and tickers[i] is None:
                    vendor_tickers[i] = None

        market_data_request.tickers = [e for e in tickers if e != None]

        if vendor_tickers is not None:
            market_data_request.vendor_tickers = [
                e for e in vendor_tickers if e != None
            ]

        data_frame_single = None

        if len(market_data_request.tickers) > 0:
            data_frame_single = self.get_data_vendor(
                market_data_request.data_source).load_ticker(
                    market_data_request)
            #print(data_frame_single.head(n=10))

        if data_frame_single is not None:
            if data_frame_single.empty == False:
                data_frame_single.index.name = 'Date'

                # will fail for dataframes which includes dates/strings (eg. futures contract names)
                try:
                    data_frame_single = data_frame_single.astype('float32')
                except:
                    self.logger.warning('Could not convert to float')

                if market_data_request.freq == "second":
                    data_frame_single = data_frame_single.resample("1s")

        return data_frame_single
Ejemplo n.º 14
0
    def fetch_market_data(self, market_data_request, kill_session=True):
        """Loads time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # data_vendor = self.get_data_vendor(market_data_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if market_data_request.vendor_tickers is not None and market_data_request.tickers is None:
            market_data_request.tickers = market_data_request.vendor_tickers

        tickers = market_data_request.tickers

        if tickers is None:
            create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            market_data_request.tickers = ConfigManager().get_instance(
            ).get_tickers_list_for_category(market_data_request.category,
                                            market_data_request.data_source,
                                            market_data_request.freq,
                                            market_data_request.cut)

        # intraday or tick: only one ticker per cache file
        if (market_data_request.freq
                in ['intraday', 'tick', 'second', 'hour', 'minute']):
            data_frame_agg = self.download_intraday_tick(market_data_request)

    #       return data_frame_agg

    # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else:
            data_frame_agg = self.download_daily(market_data_request)

        if ('internet_load' in market_data_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to data_vendor template to exit session
            # if data_vendor is not None and kill_session == True: data_vendor.kill_session()

        if (market_data_request.cache_algo == 'cache_algo'):
            self.logger.debug(
                "Only caching data in memory, do not return any time series.")
            return

        # only return time series if specified in the algo
        if 'return' in market_data_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables (also same for downloading futures
            # contracts dates)
            if market_data_request.category is not None:
                if 'events' in market_data_request.category:
                    return data_frame_agg

            # pad columns a second time (is this necessary to do here again?)
            # TODO only do this for not daily data?
            try:
                if data_frame_agg is not None:
                    data_frame_agg = self.filter.filter_time_series(market_data_request, data_frame_agg, pad_columns=True)\
                        .dropna(how = 'all')

                    # resample data using pandas if specified in the MarketDataRequest
                    if market_data_request.resample is not None:
                        if 'last' in market_data_request.resample_how:
                            data_frame_agg = data_frame_agg.resample(
                                market_data_request.resample).last()
                        elif 'first' in market_data_request.resample_how:
                            data_frame_agg = data_frame_agg.resample(
                                market_data_request.resample).first()

                        if 'dropna' in market_data_request.resample_how:
                            data_frame_agg = data_frame_agg.dropna(how='all')
                else:
                    self.logger.warn("No data returned for " +
                                     str(market_data_request.tickers))

                return data_frame_agg
            except Exception as e:
                print(str(e))
                if data_frame_agg is not None:
                    return data_frame_agg

                import traceback

                self.logger.warn("No data returned for " +
                                 str(market_data_request.tickers))

                return None
Ejemplo n.º 15
0
class DataVendor(object):
    def __init__(self):
        self.config = ConfigManager()
        # self.config = None
        return

    @abc.abstractmethod
    def load_ticker(self, market_data_request):
        """
        load_ticker - Retrieves market data from external data source

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        return

        # to be implemented by subclasses

    @abc.abstractmethod
    def kill_session(self):
        return

    def construct_vendor_market_data_request(self, market_data_request):
        """
        construct_vendor_market_data_request - creates a MarketDataRequest with the vendor tickers

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        MarketDataRequest
        """

        symbols_vendor = self.translate_to_vendor_ticker(market_data_request)
        fields_vendor = self.translate_to_vendor_field(market_data_request)

        market_data_request_vendor = copy.copy(market_data_request)

        market_data_request_vendor.tickers = symbols_vendor
        market_data_request_vendor.fields = fields_vendor

        return market_data_request_vendor

    def translate_to_vendor_field(self, market_data_request):
        """
        translate_to_vendor_field - Converts all the fields from findatapy fields to vendor fields

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        List of Strings
        """

        if market_data_request.vendor_fields is not None:
            return market_data_request.vendor_fields

        source = market_data_request.data_source
        fields_list = market_data_request.fields

        if isinstance(fields_list, str):
            fields_list = [fields_list]

        if self.config is None: return fields_list

        fields_converted = []

        for field in fields_list:
            fields_converted.append(
                self.config.convert_library_to_vendor_field(source, field))

        return fields_converted

    # translate findatapy ticker to vendor ticker
    def translate_to_vendor_ticker(self, market_data_request):
        """
        translate_to_vendor_tickers - Converts all the tickers from findatapy tickers to vendor tickers

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        List of Strings
        """

        if market_data_request.vendor_tickers is not None:
            return market_data_request.vendor_tickers

        category = market_data_request.category
        source = market_data_request.data_source
        freq = market_data_request.freq
        cut = market_data_request.cut
        tickers_list = market_data_request.tickers

        if isinstance(tickers_list, str):
            tickers_list = [tickers_list]

        if self.config is None: return tickers_list

        tickers_list_converted = []

        for ticker in tickers_list:
            tickers_list_converted.append(
                self.config.convert_library_to_vendor_ticker(
                    category, source, freq, cut, ticker))

        return tickers_list_converted

    def translate_from_vendor_field(self, vendor_fields_list,
                                    market_data_request):
        """
        translate_from_vendor_field - Converts all the fields from vendors fields to findatapy fields

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        List of Strings
        """

        data_source = market_data_request.data_source

        if isinstance(vendor_fields_list, str):
            vendor_fields_list = [vendor_fields_list]

        # if self.config is None: return vendor_fields_list

        fields_converted = []

        # if we haven't set the configuration files for automatic configuration
        if market_data_request.vendor_fields is not None:

            dictionary = dict(
                zip(market_data_request.vendor_fields,
                    market_data_request.fields))

            for vendor_field in vendor_fields_list:
                try:
                    fields_converted.append(dictionary[vendor_field])
                except:
                    fields_converted.append(vendor_field)

        # otherwise used stored configuration files (every field needs to be defined!)
        else:
            for vendor_field in vendor_fields_list:
                fields_converted.append(
                    self.config.convert_vendor_to_library_field(
                        data_source, vendor_field))

        return fields_converted

    # translate findatapy ticker to vendor ticker
    def translate_from_vendor_ticker(self, vendor_tickers_list,
                                     market_data_request):
        """
        translate_from_vendor_ticker - Converts all the fields from vendor tickers to findatapy tickers

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        List of Strings
        """

        if market_data_request.vendor_tickers is not None:

            dictionary = dict(
                zip(market_data_request.vendor_tickers,
                    market_data_request.tickers))

            tickers_stuff = []

            for vendor_ticker in vendor_tickers_list:
                tickers_stuff.append(dictionary[vendor_ticker])

            return tickers_stuff  # [item for sublist in tickers_stuff for item in sublist]

        data_source = market_data_request.data_source
        # tickers_list = market_data_request.tickers

        if isinstance(vendor_tickers_list, str):
            vendor_tickers_list = [vendor_tickers_list]

        if self.config is None: return vendor_tickers_list

        tickers_converted = []

        for vendor_ticker in vendor_tickers_list:
            tickers_converted.append(
                self.config.convert_vendor_to_library_ticker(
                    data_source, vendor_ticker))

        return tickers_converted
Ejemplo n.º 16
0
class EventsFactory(EventStudy):
    """Provides methods to fetch data on economic data events and to perform basic event studies for market data around
    these events. Note, requires a file of input of the following (transposed as columns!) - we give an example for
    NFP released on 7 Feb 2003 (note, that release-date-time-full, need not be fully aligned by row).

    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.Date	                31/01/2003 00:00
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.close	                xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release	        143
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-median	        xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-average	        xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high	        xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-low	            xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high.1	        xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.number-observations	xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision	        185
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision-date	20030307
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-dt	            20030207
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-date-time-full	08/01/1999 13:30

    """

    # _econ_data_frame = None

    # where your HDF5 file is stored with economic data
    # TODO integrate with on the fly downloading!
    _hdf5_file_econ_file = MarketConstants().hdf5_file_econ_file
    _db_database_econ_file = MarketConstants().db_database_econ_file

    ### manual offset for certain events where Bloomberg/data vendor displays the wrong date (usually because of time differences)
    _offset_events = {'AUD-Australia Labor Force Employment Change SA.release-dt' : 1}

    def __init__(self, df = None):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.io_engine = IOEngine()
        self.speed_cache = SpeedCache()

        if df is not None:
            self._econ_data_frame = df
        else:
            self.load_economic_events()

        return

    def load_economic_events(self):
        self._econ_data_frame = self.speed_cache.get_dataframe(self._db_database_econ_file)

        if self._econ_data_frame is None:
            # self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk(self._hdf5_file_econ_file)
            self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk(
                self._db_database_econ_file, engine=marketconstants.write_engine,
                db_server=marketconstants.db_server,
                db_port=marketconstants.db_port,
                username=marketconstants.db_username,
                password=marketconstants.db_password)

            self.speed_cache.put_dataframe(self._db_database_econ_file, self._econ_data_frame)

    def harvest_category(self, category_name):
        cat = self.config.get_categories_from_tickers_selective_filter(category_name)

        for k in cat:
            md_request = self.market_data_generator.populate_md_request(k)
            data_frame = self.market_data_generator.fetch_market_data(md_request)

            # TODO allow merge of multiple sources

        return data_frame

    def get_economic_events(self):
        return self._econ_data_frame

    def dump_economic_events_csv(self, path):
        self._econ_data_frame.to_csv(path)

    def get_economic_event_date_time(self, name, event = None, csv = None):
        ticker = self.create_event_desciptor_field(name, event, "release-date-time-full")

        if csv is None:
            data_frame = self._econ_data_frame[ticker]
            data_frame.index = self._econ_data_frame[ticker]
        else:
            dateparse = lambda x: datetime.datetime.strptime(x, '%d/%m/%Y %H:%M')

            data_frame = pandas.read_csv(csv, index_col=0, parse_dates = True, date_parser=dateparse)

        data_frame = data_frame[pandas.notnull(data_frame.index)]

        start_date = datetime.datetime.strptime("01-Jan-1971", "%d-%b-%Y")
        self.filter.filter_time_series_by_date(start_date, None, data_frame)

        return data_frame

    def get_economic_event_date_time_dataframe(self, name, event = None, csv = None):
        series = self.get_economic_event_date_time(name, event, csv)

        data_frame = pandas.DataFrame(series.values, index=series.index)
        data_frame.columns.name = self.create_event_desciptor_field(name, event, "release-date-time-full")

        return data_frame

    def get_economic_event_date_time_fields(self, fields, name, event = None):
        ### acceptible fields
        # observation-date <- observation time for the index
        # actual-release
        # survey-median
        # survey-average
        # survey-high
        # survey-low
        # survey-high
        # number-observations
        # release-dt
        # release-date-time-full
        # first-revision
        # first-revision-date

        ticker = []

        # construct tickers of the form USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release
        for i in range(0, len(fields)):
            ticker.append(self.create_event_desciptor_field(name, event, fields[i]))

        # index on the release-dt field eg. 20101230 (we shall convert this later)
        ticker_index = self.create_event_desciptor_field(name, event, "release-dt")

        ######## grab event date/times
        event_date_time = self.get_economic_event_date_time(name, event)
        date_time_fore = event_date_time.index

        # create dates for join later
        date_time_dt = [datetime.datetime(
                                date_time_fore[x].year,
                                date_time_fore[x].month,
                                date_time_fore[x].day)
                                for x in range(len(date_time_fore))]

        event_date_time_frame = pandas.DataFrame(event_date_time.index, date_time_dt)
        event_date_time_frame.index = date_time_dt

        ######## grab event date/fields
        self._econ_data_frame[name + ".observation-date"] = self._econ_data_frame.index
        data_frame = self._econ_data_frame[ticker]

        data_frame.index = self._econ_data_frame[ticker_index]

        data_frame = data_frame[data_frame.index != 0]              # eliminate any 0 dates (artifact of Excel)
        data_frame = data_frame[pandas.notnull(data_frame.index)]   # eliminate any NaN dates (artifact of Excel)
        ind_dt = data_frame.index

        # convert yyyymmdd format to datetime
        data_frame.index = [datetime.datetime(
                               int((ind_dt[x] - (ind_dt[x] % 10000))/10000),
                               int(((ind_dt[x] % 10000) - (ind_dt[x] % 100))/100),
                               int(ind_dt[x] % 100)) for x in range(len(ind_dt))]

        # HACK! certain events need an offset because BBG have invalid dates
        if ticker_index in self._offset_events:
             data_frame.index = data_frame.index + timedelta(days=self._offset_events[ticker_index])

        ######## join together event dates/date-time/fields in one data frame
        data_frame = event_date_time_frame.join(data_frame, how='inner')
        data_frame.index = pandas.to_datetime(data_frame.index)
        data_frame.index.name = ticker_index

        return data_frame

    def create_event_desciptor_field(self, name, event, field):
        if event is None:
            return name + "." + field
        else:
            return name + "-" + event + "." + field

    def get_all_economic_events_date_time(self):
        event_names = self.get_all_economic_events()
        columns = ['event-name', 'release-date-time-full']

        data_frame = pandas.DataFrame(data=numpy.zeros((0,len(columns))), columns=columns)

        for event in event_names:
            event_times = self.get_economic_event_date_time(event)

            for time in event_times:
                data_frame.append({'event-name':event, 'release-date-time-full':time}, ignore_index=True)

        return data_frame

    def get_all_economic_events(self):
        field_names = self._econ_data_frame.columns.values

        event_names = [x.split('.')[0] for x in field_names if '.Date' in x]

        event_names_filtered = [x for x in event_names if len(x) > 4]

        # sort list alphabetically (and remove any duplicates)
        return list(set(event_names_filtered))

    def get_economic_event_date(self, name, event = None):
        return self._econ_data_frame[
            self.create_event_desciptor_field(name, event, ".release-dt")]

    def get_economic_event_ret_over_custom_event_day(self, data_frame_in, name, event, start, end, lagged = False,
                                              NYC_cutoff = 10):

        # get the times of events
        event_dates = self.get_economic_event_date_time(name, event)

        return super(EventsFactory, self).get_economic_event_ret_over_custom_event_day(data_frame_in, event_dates, name, event, start, end,
                                                                                       lagged = lagged, NYC_cutoff = NYC_cutoff)

    def get_economic_event_vol_over_event_day(self, vol_in, name, event, start, end, realised = False):

        return self.get_economic_event_ret_over_custom_event_day(vol_in, name, event, start, end,
            lagged = realised)

        # return super(EventsFactory, self).get_economic_event_ret_over_event_day(vol_in, name, event, start, end, lagged = realised)

    def get_daily_moves_over_event(self):
        # TODO
        pass

    # return only US events etc. by dates
    def get_intraday_moves_over_event(self, data_frame_rets, cross, event_fx, event_name, start, end, vol, mins = 3 * 60,
                                      min_offset = 0, create_index = False, resample = False, freq = 'minutes'):

        ef_time_frame = self.get_economic_event_date_time_dataframe(event_fx, event_name)
        ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame)

        return self.get_intraday_moves_over_custom_event(data_frame_rets, ef_time_frame,
                                                         vol, mins = mins, min_offset = min_offset,
                                                         create_index = create_index, resample = resample, freq = freq)#, start, end)

    def get_surprise_against_intraday_moves_over_event(self, data_frame_cross_orig, cross, event_fx, event_name, start, end,
                                                       offset_list = [1, 5, 30, 60], add_surprise = False,
                                                       surprise_field = 'survey-average'):

        fields = ['actual-release', 'survey-median', 'survey-average', 'survey-high', 'survey-low']

        ef_time_frame = self.get_economic_event_date_time_fields(fields, event_fx, event_name)
        ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame)

        return self.get_surprise_against_intraday_moves_over_custom_event(data_frame_cross_orig, ef_time_frame, cross, event_fx, event_name, start, end,
                                                       offset_list = offset_list, add_surprise = add_surprise,
                                                       surprise_field = surprise_field)
Ejemplo n.º 17
0
    def fetch_market_data(self, md_request):
        """Loads time series from specified data provider

        Parameters
        ----------
        md_request : MarketDataRequest
            contains various properties describing time series to fetched, 
            including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """
        logger = LoggerManager().getLogger(__name__)

        # data_vendor = self.get_data_vendor(md_request.data_source)

        # Check if tickers have been specified (if not load all of them for a 
        # category)
        # also handle single tickers/list tickers
        create_tickers = False

        if md_request.vendor_tickers is not None \
                and md_request.tickers is None:
            md_request.tickers = md_request.vendor_tickers

        tickers = md_request.tickers

        if tickers is None:
            create_tickers = True
        elif isinstance(tickers, str):
            if tickers == "": create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            md_request.tickers = ConfigManager().get_instance()\
                .get_tickers_list_for_category(
                    md_request.category, md_request.data_source,
                    md_request.freq, md_request.cut)

        # intraday or tick: only one ticker per cache file
        if md_request.freq in ["intraday", "tick", "second", "hour",
                                "minute"]:
            df_agg = self.download_intraday_tick(md_request)

        # Daily: multiple tickers per cache file - assume we make one API call 
        # to vendor library
        else:
            df_agg = self.download_daily(md_request)

        if "internet_load" in md_request.cache_algo:
            logger.debug("Internet loading.. ")

        if md_request.cache_algo == "cache_algo":
            logger.debug(
                "Only caching data in memory, do not return any time series.")
            
            return

        # Only return time series if specified in the algo
        if "return" in md_request.cache_algo:
            # Special case for events/events-dt which is not indexed like other 
            # tables (also same for downloading futures contracts dates)
            if md_request.category is not None:
                if "events" in md_request.category:
                    return df_agg

            # Pad columns a second time (is this necessary to do here again?)
            # TODO only do this for not daily data?
            try:
                if df_agg is not None:
                    df_agg = self._filter.filter_time_series(
                        md_request, df_agg, pad_columns=True)
                    df_agg = df_agg.dropna(how="all")

                    # Resample data using pandas if specified in the 
                    # MarketDataRequest
                    if md_request.resample is not None:
                        if "last" in md_request.resample_how:
                            df_agg = df_agg.resample(
                                md_request.resample).last()
                        elif "first" in md_request.resample_how:
                            df_agg = df_agg.resample(
                                md_request.resample).first()

                        if "dropna" in md_request.resample_how:
                            df_agg = df_agg.dropna(how="all")
                else:
                    logger.warn("No data returned for " + str(
                        md_request.tickers))

                return df_agg
            except Exception as e:
                
                if df_agg is not None:
                    return df_agg

                import traceback

                logger.warn(
                    "No data returned for " 
                    + str(md_request.tickers) + ", " + str(e))

                return None
Ejemplo n.º 18
0
 def __init__(self):
     self.config = ConfigManager()
     # self.config = None
     return
Ejemplo n.º 19
0
class MarketDataGenerator(object):
    _time_series_cache = {}  # shared across all instances of object!

    def __init__(self):
        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.calculations = Calculations()
        self.io_engine = IOEngine()
        self._intraday_code = -1

        return

    def flush_cache(self):
        """
        flush_cache - Flushs internal cache of time series
        """

        self._time_series_cache = {}

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_data_vendor(self, source):
        """
        get_loader - Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.
            we can also have forms like "bloomberg-boe" separated by hyphens

        Returns
        -------
        DataVendor
        """

        data_vendor = None

        source = source.split("-")[0]

        if source == 'bloomberg':
            from findatapy.market.datavendorbbg import DataVendorBBGOpen
            data_vendor = DataVendorBBGOpen()

        elif source == 'quandl':
            from findatapy.market.datavendorweb import DataVendorQuandl
            data_vendor = DataVendorQuandl()

        elif source == 'ons':
            from findatapy.market.datavendorweb import DataVendorONS
            data_vendor = DataVendorONS()

        elif source == 'boe':
            from findatapy.market.datavendorweb import DataVendorBOE
            data_vendor = DataVendorBOE()

        elif source == 'dukascopy':
            from findatapy.market.datavendorweb import DataVendorDukasCopy
            data_vendor = DataVendorDukasCopy()

        elif source in [
                'yahoo', 'google', 'fred', 'oecd', 'eurostat', 'edgar-index'
        ]:
            from findatapy.market.datavendorweb import DataVendorPandasWeb
            data_vendor = DataVendorPandasWeb()

        # TODO add support for other data sources (like Reuters)

        return data_vendor

    def fetch_market_data(self, market_data_request, kill_session=True):
        """
        fetch_market_data - Loads time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        tickers = market_data_request.tickers
        data_vendor = self.get_data_vendor(market_data_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None:
            create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            market_data_request.tickers = self.config.get_tickers_list_for_category(
                market_data_request.category, market_data_request.data_source,
                market_data_request.freq, market_data_request.cut)

        # intraday or tick: only one ticker per cache file
        if (market_data_request.freq
                in ['intraday', 'tick', 'second', 'hour', 'minute']):
            data_frame_agg = self.download_intraday_tick(
                market_data_request, data_vendor)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else:
            data_frame_agg = self.download_daily(market_data_request,
                                                 data_vendor)

        if ('internet_load' in market_data_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to data_vendor template to exit session
            # if data_vendor is not None and kill_session == True: data_vendor.kill_session()

        if (market_data_request.cache_algo == 'cache_algo'):
            self.logger.debug(
                "Only caching data in memory, do not return any time series.")
            return

        # only return time series if specified in the algo
        if 'return' in market_data_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if market_data_request.category is not None:
                if 'events' in market_data_request.category:
                    return data_frame_agg

            try:
                return self.filter.filter_time_series(market_data_request,
                                                      data_frame_agg,
                                                      pad_columns=True)
            except:
                import traceback

                self.logger.error(traceback.format_exc())

                return None

    def get_market_data_cached(self, market_data_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        if (market_data_request.freq == "intraday"):
            ticker = market_data_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(market_data_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            return self.filter.filter_time_series(market_data_request,
                                                  data_frame)

        return None

    def create_time_series_hash_key(self, market_data_request, ticker=None):
        """
        create_time_series_hash_key - Creates a hash key for retrieving the time series

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if (isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(
            self.create_category_key(market_data_request, ticker))

    def download_intraday_tick(self, market_data_request, data_vendor):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None
        calcuations = Calculations()

        ticker_cycle = 0

        data_frame_group = []

        # single threaded version
        # handle intraday ticker calls separately one by one
        if len(market_data_request.tickers) == 1 or DataConstants(
        ).market_thread_no['other'] == 1:
            for ticker in market_data_request.tickers:
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = ticker

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = [
                        market_data_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                # we downscale into float32, to avoid memory problems in Python (32 bit)
                # data is stored on disk as float32 anyway
                data_frame_single = data_vendor.load_ticker(
                    market_data_request_single)

                # if the vendor doesn't provide any data, don't attempt to append
                if data_frame_single is not None:
                    if data_frame_single.empty == False:
                        data_frame_single.index.name = 'Date'
                        data_frame_single = data_frame_single.astype('float32')

                        data_frame_group.append(data_frame_single)

                        # # if you call for returning multiple tickers, be careful with memory considerations!
                        # if data_frame_agg is not None:
                        #     data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                        # else:
                        #     data_frame_agg = data_frame_single

                # key = self.create_category_key(market_data_request, ticker)
                # fname = self.create_cache_file_name(key)
                # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

            # if you call for returning multiple tickers, be careful with memory considerations!
            if data_frame_group is not None:
                data_frame_agg = calcuations.pandas_outer_join(
                    data_frame_group)

            return data_frame_agg
        else:
            market_data_request_list = []

            # create a list of MarketDataRequests
            for ticker in market_data_request.tickers:
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = ticker

                if hasattr(market_data_request, 'vendor_tickers'):
                    market_data_request_single.vendor_tickers = [
                        market_data_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                market_data_request_list.append(market_data_request_single)

            return self.fetch_group_time_series(market_data_request_list)

    def fetch_single_time_series(self, market_data_request):
        data_frame_single = self.get_data_vendor(
            market_data_request.data_source).load_ticker(market_data_request)

        if data_frame_single is not None:
            if data_frame_single.empty == False:
                data_frame_single.index.name = 'Date'

                # will fail for dataframes which includes dates
                try:
                    data_frame_single = data_frame_single.astype('float32')
                except:
                    pass

                if market_data_request.freq == "second":
                    data_frame_single = data_frame_single.resample("1s")

        return data_frame_single

    def fetch_group_time_series(self, market_data_request_list):

        data_frame_agg = None

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if DataConstants().market_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = DataConstants().market_thread_no['other']

        if market_data_request_list[0].data_source in DataConstants(
        ).market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request_list[0].data_source]

        if thread_no > 0:
            pool = Pool(thread_no)

            # open the market data downloads in their own threads and return the results
            result = pool.map_async(self.fetch_single_time_series,
                                    market_data_request_list)
            data_frame_group = result.get()

            pool.close()
            pool.join()
        else:
            data_frame_group = []

            for md_request in market_data_request_list:
                data_frame_group.append(
                    self.fetch_single_time_series(md_request))

        # collect together all the time series
        if data_frame_group is not None:
            data_frame_group = [i for i in data_frame_group if i is not None]

            if data_frame_group is not None:
                data_frame_agg = self.calculations.pandas_outer_join(
                    data_frame_group)

        return data_frame_agg

    def download_daily(self, market_data_request, data_vendor):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file

        if DataConstants().market_thread_no['other'] == 1:
            data_frame_agg = data_vendor.load_ticker(market_data_request)
        else:
            market_data_request_list = []

            group_size = int(
                len(market_data_request.tickers) /
                DataConstants().market_thread_no['other'] - 1)

            if group_size == 0: group_size = 1

            # split up tickers into groups related to number of threads to call
            for i in range(0, len(market_data_request.tickers), group_size):
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = market_data_request.tickers[
                    i:i + group_size]

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = \
                        market_data_request.vendor_tickers[i:i + group_size]

                market_data_request_list.append(market_data_request_single)

            data_frame_agg = self.fetch_group_time_series(
                market_data_request_list)

        key = self.create_category_key(market_data_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[
            fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def create_category_key(self, market_data_request, ticker=None):
        """
        create_category_key - Returns a category key for the associated MarketDataRequest

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        category = 'default-cat'
        cut = 'default-cut'

        if market_data_request.category is not None:
            category = market_data_request.category

        environment = market_data_request.environment
        source = market_data_request.data_source
        freq = market_data_request.freq

        if market_data_request.cut is not None: cut = market_data_request.cut

        if (ticker is not None):
            key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker
        else:
            key = environment + "." + category + '.' + source + '.' + freq + '.' + cut

        return key

    def create_cache_file_name(self, filename):
        return DataConstants().folder_time_series_data + "/" + filename
Ejemplo n.º 20
0
 def __init__(self):
     self.config = ConfigManager().get_instance()
     self.logger = LoggerManager().getLogger(__name__)
     # self.config = None
     return
Ejemplo n.º 21
0
    def fetch_single_time_series(self, md_request):
        
        md_request = MarketDataRequest(md_request=md_request)

        # Only includes those tickers have not expired yet!
        start_date = pd.Timestamp(md_request.start_date).date()

        current_date = pd.Timestamp(datetime.datetime.utcnow().date())

        tickers = md_request.tickers
        vendor_tickers = md_request.vendor_tickers

        expiry_date = pd.Timestamp(md_request.expiry_date)

        config = ConfigManager().get_instance()

        # In many cases no expiry is defined so skip them
        for i in range(0, len(tickers)):
            try:
                expiry_date = config.get_expiry_for_ticker(
                    md_request.data_source, tickers[i])
            except:
                pass

            if expiry_date is not None:
                expiry_date = pd.Timestamp(expiry_date)

                if not (pd.isna(expiry_date)):
                    # Use pandas Timestamp, a bit more robust with weird dates 
                    # (can fail if comparing date vs datetime)
                    # if the expiry is before the start date of our download 
                    # don"t bother downloading this ticker
                    if expiry_date < start_date:
                        tickers[i] = None

                    # Special case for futures-contracts which are intraday
                    # avoid downloading if the expiry date is very far in the 
                    # past
                    # (we need this before there might be odd situations where 
                    # we run on an expiry date, but still want to get
                    # data right till expiry time)
                    if md_request.category == "futures-contracts" \
                            and md_request.freq == "intraday" \
                            and self._days_expired_intraday_contract_download \
                                > 0:

                        if expiry_date + pd.Timedelta(
                                days=
                                self._days_expired_intraday_contract_download) \
                                < current_date:
                            tickers[i] = None

                    if vendor_tickers is not None and tickers[i] is None:
                        vendor_tickers[i] = None

        md_request.tickers = [e for e in tickers if e != None]

        if vendor_tickers is not None:
            md_request.vendor_tickers = [e for e in vendor_tickers if
                                                  e != None]

        df_single = None

        if len(md_request.tickers) > 0:
            df_single = self.get_data_vendor(
                md_request).load_ticker(md_request)

        if df_single is not None:
            if df_single.empty == False:
                df_single.index.name = "Date"

                # Will fail for DataFrames which includes dates/strings 
                # eg. futures contract names
                df_single = Calculations().convert_to_numeric_dataframe(
                    df_single)

                if md_request.freq == "second":
                    df_single = df_single.resample("1s")

        return df_single
Ejemplo n.º 22
0
 def __init__(self):
     self.config = ConfigManager().get_instance()
     # self.config = None
     return
Ejemplo n.º 23
0
class EventsFactory(EventStudy):
    """Provides methods to fetch data on economic data events and to perform basic event studies for market data around
    these events. Note, requires a file of input of the following (transposed as columns!) - we give an example for
    NFP released on 7 Feb 2003 (note, that release-date-time-full, need not be fully aligned by row).

    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.Date	                31/01/2003 00:00
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.close	                xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release	        143
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-median	        xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-average	        xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high	        xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-low	            xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.survey-high.1	        xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.number-observations	xyz
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision	        185
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.first-revision-date	20030307
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-dt	            20030207
    USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.release-date-time-full	08/01/1999 13:30

    """

    # _econ_data_frame = None

    # where your HDF5 file is stored with economic data
    # TODO integrate with on the fly downloading!
    _hdf5_file_econ_file = MarketConstants().hdf5_file_econ_file
    _db_database_econ_file = MarketConstants().db_database_econ_file

    ### manual offset for certain events where Bloomberg/data vendor displays the wrong date (usually because of time differences)
    _offset_events = {'AUD-Australia Labor Force Employment Change SA.release-dt' : 1}

    def __init__(self, df = None):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.io_engine = IOEngine()
        self.speed_cache = SpeedCache()

        if df is not None:
            self._econ_data_frame = df
        else:
            self.load_economic_events()

        return

    def load_economic_events(self):
        self._econ_data_frame = self.speed_cache.get_dataframe(self._db_database_econ_file)

        if self._econ_data_frame is None:
            self._econ_data_frame = self.io_engine.read_time_series_cache_from_disk(
                self._db_database_econ_file, engine=marketconstants.write_engine,
                db_server=marketconstants.db_server,
                db_port=marketconstants.db_port,
                username=marketconstants.db_username,
                password=marketconstants.db_password)

            self.speed_cache.put_dataframe(self._db_database_econ_file, self._econ_data_frame)

    def harvest_category(self, category_name):
        cat = self.config.get_categories_from_tickers_selective_filter(category_name)

        for k in cat:
            md_request = self.market_data_generator.populate_md_request(k)
            data_frame = self.market_data_generator.fetch_market_data(md_request)

            # TODO allow merge of multiple sources

        return data_frame

    def get_economic_events(self):
        return self._econ_data_frame

    def dump_economic_events_csv(self, path):
        self._econ_data_frame.to_csv(path)

    def get_economic_event_date_time(self, name, event = None, csv = None):
        ticker = self.create_event_desciptor_field(name, event, "release-date-time-full")

        if csv is None:
            data_frame = self._econ_data_frame[ticker]
            data_frame.index = self._econ_data_frame[ticker]
        else:
            dateparse = lambda x: datetime.datetime.strptime(x, '%d/%m/%Y %H:%M')

            data_frame = pandas.read_csv(csv, index_col=0, parse_dates = True, date_parser=dateparse)

        data_frame = data_frame[pandas.notnull(data_frame.index)]
        #start at a really early date
        start_date = datetime.datetime.strptime("01-Jan-1971", "%d-%b-%Y")
        self.filter.filter_time_series_by_date(start_date, None, data_frame)

        return data_frame

    def get_economic_event_date_time_dataframe(self, name, event = None, csv = None):
        series = self.get_economic_event_date_time(name, event, csv)

        data_frame = pandas.DataFrame(series.values, index=series.index)
        data_frame.columns.name = self.create_event_desciptor_field(name, event, "release-date-time-full")

        return data_frame

    def get_economic_event_date_time_fields(self, fields, name, event = None):
        ### acceptible fields
        # observation-date <- observation time for the index
        # actual-release
        # survey-median
        # survey-average
        # survey-high
        # survey-low
        # survey-high
        # number-observations
        # release-dt
        # release-date-time-full
        # first-revision
        # first-revision-date

        ticker = []

        # construct tickers of the form USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release
        for i in range(0, len(fields)):
            ticker.append(self.create_event_desciptor_field(name, event, fields[i]))

        # index on the release-dt field eg. 20101230 (we shall convert this later)
        ticker_index = self.create_event_desciptor_field(name, event, "release-dt")

        ######## grab event date/times
        event_date_time = self.get_economic_event_date_time(name, event)
        date_time_fore = event_date_time.index

        # create dates for join later
        date_time_dt = [datetime.datetime(
                                date_time_fore[x].year,
                                date_time_fore[x].month,
                                date_time_fore[x].day)
                                for x in range(len(date_time_fore))]

        event_date_time_frame = pandas.DataFrame(event_date_time.index, date_time_dt)
        event_date_time_frame.index = date_time_dt

        ######## grab event date/fields
        self._econ_data_frame[name + ".observation-date"] = self._econ_data_frame.index
        data_frame = self._econ_data_frame[ticker]

        data_frame.index = self._econ_data_frame[ticker_index]

        data_frame = data_frame[data_frame.index != 0]              # eliminate any 0 dates (artifact of Excel)
        data_frame = data_frame[pandas.notnull(data_frame.index)]   # eliminate any NaN dates (artifact of Excel)
        ind_dt = data_frame.index

        # convert yyyymmdd format to datetime
        data_frame.index = [datetime.datetime(
                               int((ind_dt[x] - (ind_dt[x] % 10000))/10000),
                               int(((ind_dt[x] % 10000) - (ind_dt[x] % 100))/100),
                               int(ind_dt[x] % 100)) for x in range(len(ind_dt))]

        # HACK! certain events need an offset because BBG have invalid dates
        if ticker_index in self._offset_events:
             data_frame.index = data_frame.index + timedelta(days=self._offset_events[ticker_index])

        ######## join together event dates/date-time/fields in one data frame
        data_frame = event_date_time_frame.join(data_frame, how='inner')
        data_frame.index = pandas.to_datetime(data_frame.index)
        data_frame.index.name = ticker_index

        return data_frame

    def create_event_desciptor_field(self, name, event, field):
        if event is None:
            return name + "." + field
        else:
            return name + "-" + event + "." + field

    def get_all_economic_events_date_time(self):
        event_names = self.get_all_economic_events()
        columns = ['event-name', 'release-date-time-full']

        data_frame = pandas.DataFrame(data=numpy.zeros((0,len(columns))), columns=columns)

        for event in event_names:
            event_times = self.get_economic_event_date_time(event)

            for time in event_times:
                data_frame.append({'event-name':event, 'release-date-time-full':time}, ignore_index=True)

        return data_frame

    def get_all_economic_events(self):
        field_names = self._econ_data_frame.columns.values

        event_names = [x.split('.')[0] for x in field_names if '.Date' in x]

        event_names_filtered = [x for x in event_names if len(x) > 4]

        # sort list alphabetically (and remove any duplicates)
        return list(set(event_names_filtered))

    def get_economic_event_date(self, name, event = None):
        return self._econ_data_frame[
            self.create_event_desciptor_field(name, event, ".release-dt")]

    def get_economic_event_ret_over_custom_event_day(self, data_frame_in, name, event, start, end, lagged = False,
                                              NYC_cutoff = 10):

        # get the times of events
        event_dates = self.get_economic_event_date_time(name, event)

        return super(EventsFactory, self).get_economic_event_ret_over_custom_event_day(data_frame_in, event_dates, name, event, start, end,
                                                                                       lagged = lagged, NYC_cutoff = NYC_cutoff)

    def get_economic_event_vol_over_event_day(self, vol_in, name, event, start, end, realised = False):

        return self.get_economic_event_ret_over_custom_event_day(vol_in, name, event, start, end,
            lagged = realised)

    def get_daily_moves_over_event(self):
        # TODO
        pass

    # return only US events etc. by dates
    def get_intraday_moves_over_event(self, data_frame_rets, cross, event_fx, event_name, start, end, vol, mins = 3 * 60,
                                      min_offset = 0, create_index = False, resample = False, freq = 'minutes'):

        ef_time_frame = self.get_economic_event_date_time_dataframe(event_fx, event_name)
        ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame)

        return self.get_intraday_moves_over_custom_event(data_frame_rets, ef_time_frame,
                                                         vol, mins = mins, min_offset = min_offset,
                                                         create_index = create_index, resample = resample, freq = freq)#, start, end)

    def get_surprise_against_intraday_moves_over_event(self, data_frame_cross_orig, cross, event_fx, event_name, start, end,
                                                       offset_list = [1, 5, 30, 60], add_surprise = False,
                                                       surprise_field = 'survey-average'):

        fields = ['actual-release', 'survey-median', 'survey-average', 'survey-high', 'survey-low']

        ef_time_frame = self.get_economic_event_date_time_fields(fields, event_fx, event_name)
        ef_time_frame = self.filter.filter_time_series_by_date(start, end, ef_time_frame)

        return self.get_surprise_against_intraday_moves_over_custom_event(data_frame_cross_orig, ef_time_frame, cross, event_fx, event_name, start, end,
                                                       offset_list = offset_list, add_surprise = add_surprise,
                                                       surprise_field = surprise_field)
Ejemplo n.º 24
0
    def fetch_market_data(self, market_data_request, kill_session=True):
        """
        fetch_market_data - Loads time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        tickers = market_data_request.tickers
        data_vendor = self.get_data_vendor(market_data_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None:
            create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            market_data_request.tickers = ConfigManager().get_instance(
            ).get_tickers_list_for_category(market_data_request.category,
                                            market_data_request.data_source,
                                            market_data_request.freq,
                                            market_data_request.cut)

        # intraday or tick: only one ticker per cache file
        if (market_data_request.freq
                in ['intraday', 'tick', 'second', 'hour', 'minute']):
            data_frame_agg = self.download_intraday_tick(
                market_data_request, data_vendor)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else:
            data_frame_agg = self.download_daily(market_data_request,
                                                 data_vendor)

        if ('internet_load' in market_data_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to data_vendor template to exit session
            # if data_vendor is not None and kill_session == True: data_vendor.kill_session()

        if (market_data_request.cache_algo == 'cache_algo'):
            self.logger.debug(
                "Only caching data in memory, do not return any time series.")
            return

        # only return time series if specified in the algo
        if 'return' in market_data_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if market_data_request.category is not None:
                if 'events' in market_data_request.category:
                    return data_frame_agg

            try:
                return self.filter.filter_time_series(market_data_request,
                                                      data_frame_agg,
                                                      pad_columns=True)
            except:
                import traceback

                self.logger.error(traceback.format_exc())

                return None
    # findatapy has predefined tickers and categories in the conf folder in
    # CSV files which you can modify, that
    # map from tickers/categories to vendor tickers/vendor fields
    #
    # time_series_categories_fields.csv
    # time_series_fields_list.csv
    # time_series_tickers_list.csv
    #
    # You can also add you own files with tickers, like we have done for FX vol
    # like fx_vol_tickers.csv
    #
    # Having these ticker mappings makes it easier to mix and match data from
    # different data sources, and saves us
    # having to remember vendor specific tickers

    cm = ConfigManager().get_instance()

    # Get all the categories for raw data (note this won't include generated
    # categories like fx-vol-market,
    # which aggregate from many other categories)
    categories = list(cm.get_categories_from_tickers())

    print(categories)

    # Filter those categories which include quandl
    quandl_category = [x for x in categories if 'quandl' in x]

    print(quandl_category[0])

    # For this category, get all the tickers and fields which are available
    tickers = cm.get_tickers_list_for_category_str(categories[0])