コード例 #1
0
ファイル: datavendor.py プロジェクト: zmaenpaa/findatapy
    def translate_from_vendor_ticker(self, vendor_tickers_list,
                                     market_data_request):
        """Converts all the fields from vendor tickers to findatapy tickers

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        List of Strings
        """

        if market_data_request.vendor_tickers is not None:

            dictionary = dict(
                zip(market_data_request.vendor_tickers,
                    market_data_request.tickers))

            tickers_stuff = []

            for vendor_ticker in vendor_tickers_list:
                tickers_stuff.append(dictionary[vendor_ticker])

            return tickers_stuff  # [item for sublist in tickers_stuff for item in sublist]

        data_source = market_data_request.data_source
        # tickers_list = market_data_request.tickers

        if isinstance(vendor_tickers_list, str):
            vendor_tickers_list = [vendor_tickers_list]

        if self.config is None: return vendor_tickers_list

        tickers_converted = []

        for vendor_ticker in vendor_tickers_list:
            try:
                v = self.config.convert_vendor_to_library_ticker(
                    data_source, vendor_ticker)
            except:
                logger = LoggerManager().getLogger(__name__)
                logger.error(
                    "Couldn't find ticker conversion, did you type it correctly: "
                    + vendor_ticker)

                return

            tickers_converted.append(v)

        return tickers_converted
コード例 #2
0
    def fetch_group_time_series(self, market_data_request_list):

        logger = LoggerManager().getLogger(__name__)

        df_agg = None

        thread_no = constants.market_thread_no["other"]

        if market_data_request_list[
            0].data_source in constants.market_thread_no:
            thread_no = constants.market_thread_no[
                market_data_request_list[0].data_source]

        if thread_no > 0:
            pool = SwimPool().create_pool(
                thread_technique=constants.market_thread_technique,
                thread_no=thread_no)

            # Open the market data downloads in their own threads and return 
            # the results
            result = pool.map_async(self.fetch_single_time_series,
                                    market_data_request_list)
            df_group = result.get()

            pool.close()
            pool.join()
        else:
            df_group = []

            for md_request in market_data_request_list:
                df_group.append(
                    self.fetch_single_time_series(md_request))

        # Collect together all the time series
        if df_group is not None:
            df_group = [i for i in df_group if i is not None]

            if df_group is not None:
                try:
                    df_agg = self._calculations.join(df_group,
                                                             how="outer")

                    # Force ordering to be the same!
                    # df_agg = df_agg[columns]
                except Exception as e:
                    logger.warning(
                        "Possible overlap of columns? Have you specifed same "
                        "ticker several times: " + str(e))

        return df_agg
コード例 #3
0
ファイル: eventstudy.py プロジェクト: stonefir/finmarketpy
    def __init__(self, market_data_generator=None):
        self.logger = LoggerManager().getLogger(__name__)

        self._all_econ_tickers = pandas.read_csv(
            DataConstants().all_econ_tickers)
        self._econ_country_codes = pandas.read_csv(
            DataConstants().econ_country_codes)
        self._econ_country_groups = pandas.read_csv(
            DataConstants().econ_country_groups)

        if market_data_generator is None:
            self.market_data_generator = MarketDataGenerator()
        else:
            self.market_data_generator = market_data_generator
コード例 #4
0
    def fetch_group_time_series(self, market_data_request_list):

        logger = LoggerManager().getLogger(__name__)

        data_frame_agg = None

        thread_no = constants.market_thread_no['other']

        if market_data_request_list[0].data_source in constants.market_thread_no:
            thread_no = constants.market_thread_no[market_data_request_list[0].data_source]

        if thread_no > 0:
            pool = SwimPool().create_pool(thread_technique = constants.market_thread_technique, thread_no=thread_no)

            # Open the market data downloads in their own threads and return the results
            result = pool.map_async(self.fetch_single_time_series, market_data_request_list)
            data_frame_group = result.get()

            pool.close()
            pool.join()
        else:
            data_frame_group = []

            for md_request in market_data_request_list:
                data_frame_group.append(self.fetch_single_time_series(md_request))

        # Collect together all the time series
        if data_frame_group is not None:
            data_frame_group = [i for i in data_frame_group if i is not None]

            # import itertools
            # columns = list(itertools.chain.from_iterable([i.columns for i in data_frame_group if i is not None]))

            # For debugging!
            # import pickle
            # import datetime
            # pickle.dump(data_frame_group, open(str(datetime.datetime.now()).replace(':', '-').replace(' ', '-').replace(".", "-") + ".p", "wb"))

            if data_frame_group is not None:
                try:
                    data_frame_agg = self.calculations.join(data_frame_group, how='outer')

                    # Force ordering to be the same!
                    # data_frame_agg = data_frame_agg[columns]
                except Exception as e:
                    logger.warning('Possible overlap of columns? Have you specifed same ticker several times: ' + str(e))

        return data_frame_agg
コード例 #5
0
ファイル: datavendor.py プロジェクト: vishalbelsare/findatapy
    def translate_to_vendor_ticker(self, md_request):
        """Converts all the tickers from findatapy tickers to vendor tickers

        Parameters
        ----------
        md_request : MarketDataRequest
            contains all the various parameters detailing time series start 
            and finish, tickers etc

        Returns
        -------
        List of Strings
        """

        if md_request.vendor_tickers is not None:
            return md_request.vendor_tickers

        category = md_request.category
        source = md_request.data_source
        freq = md_request.freq
        cut = md_request.cut
        tickers_list = md_request.tickers

        if isinstance(tickers_list, str):
            tickers_list = [tickers_list]

        if self.config is None: return tickers_list

        tickers_list_converted = []

        for ticker in tickers_list:
            try:
                t = self.config.convert_library_to_vendor_ticker(category,
                                                                 source, freq,
                                                                 cut, ticker)
            except:
                logger = LoggerManager().getLogger(__name__)
                logger.error(
                    "Couldn't find ticker conversion, did you type "
                    "it correctly: " + ticker)

                return

            tickers_list_converted.append(t)

        return tickers_list_converted
コード例 #6
0
ファイル: market.py プロジェクト: quantForma/findatapy
    def __init__(self, market_data_generator=None):
        self.logger = LoggerManager().getLogger(__name__)

        self.cache = {}

        self.calculations = Calculations()
        self.market_data_generator = market_data_generator

        return
コード例 #7
0
    def convert_to_numeric_dataframe(self, data_frame):

        logger = LoggerManager().getLogger(__name__)

        failed_conversion_cols = []

        for c in data_frame.columns:
            is_date = False

            # If it's a date column don't append to convert to a float
            for d in constants.always_date_columns:
                if d in c or 'release-dt' in c:
                    is_date = True
                    break

            if is_date:
                try:
                    data_frame[c] = pd.to_datetime(data_frame[c],
                                                   errors='coerce')
                except:
                    pass
            else:
                try:
                    data_frame[c] = data_frame[c].astype('float32')
                except:
                    if '.' in c:
                        if c.split('.')[1] in constants.always_numeric_column:
                            data_frame[c] = data_frame[c].astype(
                                'float32', errors='coerce')
                        else:
                            failed_conversion_cols.append(c)
                    else:
                        failed_conversion_cols.append(c)

                try:
                    data_frame[c] = data_frame[c].fillna(value=np.nan)
                except:
                    pass

        if failed_conversion_cols != []:
            logger.warning('Could not convert to float for ' +
                           str(failed_conversion_cols))

        return data_frame
コード例 #8
0
ファイル: eventstudy.py プロジェクト: walobit/finmarketpy
    def __init__(self):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.io_engine = IOEngine()

        if (EventsFactory._econ_data_frame is None):
            self.load_economic_events()
        return
コード例 #9
0
    def __init__(self, market_data_generator = None):
        self.logger = LoggerManager().getLogger(__name__)

        self._all_econ_tickers = pandas.read_csv(DataConstants().all_econ_tickers)
        self._econ_country_codes = pandas.read_csv(DataConstants().econ_country_codes)
        self._econ_country_groups = pandas.read_csv(DataConstants().econ_country_groups)

        if market_data_generator is None:
            self.market_data_generator = MarketDataGenerator()
        else:
            self.market_data_generator = market_data_generator
コード例 #10
0
ファイル: datavendor.py プロジェクト: vishalbelsare/findatapy
    def translate_to_vendor_field(self, md_request):
        """Converts all the fields from findatapy fields to vendor fields

        Parameters
        ----------
        md_request : MarketDataRequest
            contains all the various parameters detailing time series start 
            and finish, tickers etc

        Returns
        -------
        List of Strings
        """

        if md_request.vendor_fields is not None:
            return md_request.vendor_fields

        source = md_request.data_source
        fields_list = md_request.fields

        if isinstance(fields_list, str):
            fields_list = [fields_list]

        if self.config is None: return fields_list

        fields_converted = []

        for field in fields_list:
            try:
                f = self.config.convert_library_to_vendor_field(source, field)
            except:
                logger = LoggerManager().getLogger(__name__)
                logger.warn(
                    "Couldn't find field conversion, "
                    "did you type it correctly: " + field)

                return

            fields_converted.append(f)

        return fields_converted
コード例 #11
0
ファイル: market.py プロジェクト: quantForma/findatapy
    def __init__(self, market_data_generator=None):
        self.logger = LoggerManager().getLogger(__name__)

        self.market_data_generator = market_data_generator

        self.calculations = Calculations()
        self.filter = Filter()
        self.timezone = Timezone()

        self.rates = RatesFactory()

        return
コード例 #12
0
ファイル: eventstudy.py プロジェクト: stonefir/finmarketpy
    def __init__(self, df=None):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.io_engine = IOEngine()

        if df is not None:
            self._econ_data_frame = df
        else:
            self.load_economic_events()

        return
コード例 #13
0
def load_tickers():
    logger = LoggerManager.getLogger(__name__)

    market = Market(market_data_generator=MarketDataGenerator())

    DataConstants.market_thread_technique = 'thread'

    # load S&P 500 ticker via wikipedia
    snp = pd.read_html(
        'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    tickers = snp[0]['Symbol'].to_list()

    # download equities data from Yahoo
    md_request = MarketDataRequest(
        start_date=START_DATE,
        data_source='yahoo',  # use Bloomberg as data source
        tickers=tickers,  # ticker (findatapy)
        fields=['close', 'open', 'high', 'low',
                'volume'],  # which fields to download
        vendor_tickers=tickers,  # ticker (Yahoo)
        vendor_fields=['Close', 'Open', 'High', 'Low',
                       'Volume'])  # which Bloomberg fields to download)

    logger.info("Loading data with threading")

    df = market.fetch_market(md_request)

    logger.info("Loading data with multiprocessing")

    DataConstants.market_thread_technique = 'multiprocessing'

    df = market.fetch_market(md_request)

    logger.info("Loaded data with multiprocessing")

    df.to_csv("temp_downloads/snp.csv")
コード例 #14
0
ファイル: market.py プロジェクト: quantForma/findatapy
class FXCrossFactory(object):
    """Generates FX spot time series and FX total return time series (assuming we already have
    total return indices available from xxxUSD form) from underlying series. Can also produce cross rates from the USD
    crosses.

    """
    def __init__(self, market_data_generator=None):
        self.logger = LoggerManager().getLogger(__name__)
        self.fxconv = FXConv()

        self.cache = {}

        self.calculations = Calculations()
        self.market_data_generator = market_data_generator

        return

    def get_fx_cross_tick(self,
                          start,
                          end,
                          cross,
                          cut="NYC",
                          data_source="dukascopy",
                          cache_algo='internet_load_return',
                          type='spot',
                          environment='backtest',
                          fields=['bid', 'ask']):

        if isinstance(cross, str):
            cross = [cross]

        market_data_request = MarketDataRequest(
            gran_freq="tick",
            freq_mult=1,
            freq='tick',
            cut=cut,
            fields=['bid', 'ask', 'bidv', 'askv'],
            cache_algo=cache_algo,
            environment=environment,
            start_date=start,
            finish_date=end,
            data_source=data_source,
            category='fx')

        market_data_generator = self.market_data_generator
        data_frame_agg = None

        for cr in cross:

            if (type == 'spot'):
                market_data_request.tickers = cr

                cross_vals = market_data_generator.fetch_market_data(
                    market_data_request)

                if cross_vals is not None:

                    # if user only wants 'close' calculate that from the bid/ask fields
                    if fields == ['close']:
                        cross_vals = cross_vals[[cr + '.bid',
                                                 cr + '.ask']].mean(axis=1)
                        cross_vals.columns = [cr + '.close']
                    else:
                        filter = Filter()

                        filter_columns = [cr + '.' + f for f in fields]
                        cross_vals = filter.filter_time_series_by_columns(
                            filter_columns, cross_vals)

            if data_frame_agg is None:
                data_frame_agg = cross_vals
            else:
                data_frame_agg = data_frame_agg.join(cross_vals, how='outer')

        if data_frame_agg is not None:
            # strip the nan elements
            data_frame_agg = data_frame_agg.dropna()

        return data_frame_agg

    def get_fx_cross(self,
                     start,
                     end,
                     cross,
                     cut="NYC",
                     data_source="bloomberg",
                     freq="intraday",
                     cache_algo='internet_load_return',
                     type='spot',
                     environment='backtest',
                     fields=['close']):

        if data_source == "gain" or data_source == 'dukascopy' or freq == 'tick':
            return self.get_fx_cross_tick(start,
                                          end,
                                          cross,
                                          cut=cut,
                                          data_source=data_source,
                                          cache_algo=cache_algo,
                                          type='spot',
                                          fields=fields)

        if isinstance(cross, str):
            cross = [cross]

        market_data_request_list = []
        freq_list = []
        type_list = []

        for cr in cross:
            market_data_request = MarketDataRequest(freq_mult=1,
                                                    cut=cut,
                                                    fields=['close'],
                                                    freq=freq,
                                                    cache_algo=cache_algo,
                                                    start_date=start,
                                                    finish_date=end,
                                                    data_source=data_source,
                                                    environment=environment)

            market_data_request.type = type
            market_data_request.cross = cr

            if freq == 'intraday':
                market_data_request.gran_freq = "minute"  # intraday

            elif freq == 'daily':
                market_data_request.gran_freq = "daily"  # daily

            market_data_request_list.append(market_data_request)

        data_frame_agg = []

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if constantsmarket_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = constants.market_thread_no['other']

        if market_data_request_list[
                0].data_source in constants.market_thread_no:
            thread_no = constants.market_thread_no[
                market_data_request_list[0].data_source]

        # fudge, issue with multithreading and accessing HDF5 files
        # if self.market_data_generator.__class__.__name__ == 'CachedMarketDataGenerator':
        #    thread_no = 0
        thread_no = 0

        if (thread_no > 0):
            pool = Pool(thread_no)

            # open the market data downloads in their own threads and return the results
            df_list = pool.map_async(self._get_individual_fx_cross,
                                     market_data_request_list).get()

            data_frame_agg = self.calculations.iterative_outer_join(df_list)

            # data_frame_agg = self.calculations.pandas_outer_join(result.get())

            try:
                pool.close()
                pool.join()
            except:
                pass
        else:
            for md_request in market_data_request_list:
                data_frame_agg.append(
                    self._get_individual_fx_cross(md_request))

            data_frame_agg = self.calculations.pandas_outer_join(
                data_frame_agg)

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna(how='all')

        # self.speed_cache.put_dataframe(key, data_frame_agg)

        return data_frame_agg

    def _get_individual_fx_cross(self, market_data_request):
        cr = market_data_request.cross
        type = market_data_request.type
        freq = market_data_request.freq

        base = cr[0:3]
        terms = cr[3:6]

        if (type == 'spot'):
            # non-USD crosses
            if base != 'USD' and terms != 'USD':
                base_USD = self.fxconv.correct_notation('USD' + base)
                terms_USD = self.fxconv.correct_notation('USD' + terms)

                # TODO check if the cross exists in the database

                # download base USD cross
                market_data_request.tickers = base_USD
                market_data_request.category = 'fx'

                base_vals = self.market_data_generator.fetch_market_data(
                    market_data_request)

                # download terms USD cross
                market_data_request.tickers = terms_USD
                market_data_request.category = 'fx'

                terms_vals = self.market_data_generator.fetch_market_data(
                    market_data_request)

                # if quoted USD/base flip to get USD terms
                if (base_USD[0:3] == 'USD'):
                    base_vals = 1 / base_vals

                # if quoted USD/terms flip to get USD terms
                if (terms_USD[0:3] == 'USD'):
                    terms_vals = 1 / terms_vals

                base_vals.columns = ['temp']
                terms_vals.columns = ['temp']

                cross_vals = base_vals.div(terms_vals, axis='index')
                cross_vals.columns = [cr + '.close']

                base_vals.columns = [base_USD + '.close']
                terms_vals.columns = [terms_USD + '.close']
            else:
                # if base == 'USD': non_USD = terms
                # if terms == 'USD': non_USD = base

                correct_cr = self.fxconv.correct_notation(cr)

                market_data_request.tickers = correct_cr
                market_data_request.category = 'fx'

                cross_vals = self.market_data_generator.fetch_market_data(
                    market_data_request)

                # special case for USDUSD!
                if base + terms == 'USDUSD':
                    if freq == 'daily':
                        cross_vals = pandas.DataFrame(
                            1,
                            index=cross_vals.index,
                            columns=cross_vals.columns)
                        filter = Filter()
                        cross_vals = filter.filter_time_series_by_holidays(
                            cross_vals, cal='WEEKDAY')
                else:
                    # flip if not convention (eg. JPYUSD)
                    if (correct_cr != cr):
                        cross_vals = 1 / cross_vals

                # cross_vals = self.market_data_generator.harvest_time_series(market_data_request)
                cross_vals.columns = [cr + '.close']

        elif type[0:3] == "tot":
            if freq == 'daily':
                # download base USD cross
                market_data_request.tickers = base + 'USD'
                market_data_request.category = 'fx-tot'

                if type == "tot":
                    base_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                else:
                    x = 0

                # download terms USD cross
                market_data_request.tickers = terms + 'USD'
                market_data_request.category = 'fx-tot'

                if type == "tot":
                    terms_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                else:
                    pass

                # base_rets = self.calculations.calculate_returns(base_vals)
                # terms_rets = self.calculations.calculate_returns(terms_vals)

                # special case for USDUSD case (and if base or terms USD are USDUSD
                if base + terms == 'USDUSD':
                    base_rets = self.calculations.calculate_returns(base_vals)
                    cross_rets = pandas.DataFrame(0,
                                                  index=base_rets.index,
                                                  columns=base_rets.columns)
                elif base + 'USD' == 'USDUSD':
                    cross_rets = -self.calculations.calculate_returns(
                        terms_vals)
                elif terms + 'USD' == 'USDUSD':
                    cross_rets = self.calculations.calculate_returns(base_vals)
                else:
                    base_rets = self.calculations.calculate_returns(base_vals)
                    terms_rets = self.calculations.calculate_returns(
                        terms_vals)

                    cross_rets = base_rets.sub(terms_rets.iloc[:, 0], axis=0)

                # first returns of a time series will by NaN, given we don't know previous point
                cross_rets.iloc[0] = 0

                cross_vals = self.calculations.create_mult_index(cross_rets)
                cross_vals.columns = [cr + '-tot.close']

            elif freq == 'intraday':
                self.logger.info(
                    'Total calculated returns for intraday not implemented yet'
                )
                return None

        return cross_vals
コード例 #15
0
class MarketDataGenerator(object):
    """Returns market data time series by directly calling market data sources.

    At present it supports Bloomberg (bloomberg), Yahoo (yahoo), Quandl (quandl), FRED (fred) etc. which are implemented
    in subclasses of DataVendor class. This provides a common wrapper for all these data sources.

    """
    def __init__(self):
        self.config = ConfigManager().get_instance()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.calculations = Calculations()
        self.io_engine = IOEngine()
        self._intraday_code = -1
        self.days_expired_intraday_contract_download = -1

        return

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_data_vendor(self, source):
        """Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.
            we can also have forms like "bloomberg-boe" separated by hyphens

        Returns
        -------
        DataVendor
        """

        data_vendor = None

        try:
            source = source.split("-")[0]
        except:
            self.logger.error("Was data source specified?")

            return None

        if source == 'bloomberg':
            try:
                from findatapy.market.datavendorbbg import DataVendorBBGOpen
                data_vendor = DataVendorBBGOpen()
            except:
                self.logger.warn("Bloomberg needs to be installed")

        elif source == 'quandl':
            from findatapy.market.datavendorweb import DataVendorQuandl
            data_vendor = DataVendorQuandl()

        elif source == 'ons':
            from findatapy.market.datavendorweb import DataVendorONS
            data_vendor = DataVendorONS()

        elif source == 'boe':
            from findatapy.market.datavendorweb import DataVendorBOE
            data_vendor = DataVendorBOE()

        elif source == 'dukascopy':
            from findatapy.market.datavendorweb import DataVendorDukasCopy
            data_vendor = DataVendorDukasCopy()

        elif source == 'fxcm':
            from findatapy.market.datavendorweb import DataVendorFXCM
            data_vendor = DataVendorFXCM()

        elif source == 'alfred':
            from findatapy.market.datavendorweb import DataVendorALFRED
            data_vendor = DataVendorALFRED()

        elif source == 'yahoo':
            from findatapy.market.datavendorweb import DataVendorYahoo
            data_vendor = DataVendorYahoo()

        elif source in ['google', 'fred', 'oecd', 'eurostat', 'edgar-index']:
            from findatapy.market.datavendorweb import DataVendorPandasWeb
            data_vendor = DataVendorPandasWeb()

        elif source == 'bitcoincharts':
            from findatapy.market.datavendorweb import DataVendorBitcoincharts
            data_vendor = DataVendorBitcoincharts()
        elif source == 'poloniex':
            from findatapy.market.datavendorweb import DataVendorPoloniex
            data_vendor = DataVendorPoloniex()
        elif source == 'binance':
            from findatapy.market.datavendorweb import DataVendorBinance
            data_vendor = DataVendorBinance()
        elif source == 'bitfinex':
            from findatapy.market.datavendorweb import DataVendorBitfinex
            data_vendor = DataVendorBitfinex()
        elif source == 'gdax':
            from findatapy.market.datavendorweb import DataVendorGdax
            data_vendor = DataVendorGdax()
        elif source == 'kraken':
            from findatapy.market.datavendorweb import DataVendorKraken
            data_vendor = DataVendorKraken()
        elif source == 'bitmex':
            from findatapy.market.datavendorweb import DataVendorBitmex
            data_vendor = DataVendorBitmex()
        elif '.csv' in source or '.h5' in source:
            from findatapy.market.datavendorweb import DataVendorFlatFile
            data_vendor = DataVendorFlatFile()
        elif source == 'alphavantage':
            from findatapy.market.datavendorweb import DataVendorAlphaVantage
            data_vendor = DataVendorAlphaVantage()
        elif source == 'huobi':
            from findatapy.market.datavendorweb import DataVendorHuobi
            data_vendor = DataVendorHuobi()

        # TODO add support for other data sources (like Reuters)

        return data_vendor

    def fetch_market_data(self, market_data_request, kill_session=True):
        """Loads time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # data_vendor = self.get_data_vendor(market_data_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if market_data_request.vendor_tickers is not None and market_data_request.tickers is None:
            market_data_request.tickers = market_data_request.vendor_tickers

        tickers = market_data_request.tickers

        if tickers is None:
            create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            market_data_request.tickers = ConfigManager().get_instance(
            ).get_tickers_list_for_category(market_data_request.category,
                                            market_data_request.data_source,
                                            market_data_request.freq,
                                            market_data_request.cut)

        # intraday or tick: only one ticker per cache file
        if (market_data_request.freq
                in ['intraday', 'tick', 'second', 'hour', 'minute']):
            data_frame_agg = self.download_intraday_tick(market_data_request)

    #       return data_frame_agg

    # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else:
            data_frame_agg = self.download_daily(market_data_request)

        if ('internet_load' in market_data_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to data_vendor template to exit session
            # if data_vendor is not None and kill_session == True: data_vendor.kill_session()

        if (market_data_request.cache_algo == 'cache_algo'):
            self.logger.debug(
                "Only caching data in memory, do not return any time series.")
            return

        # only return time series if specified in the algo
        if 'return' in market_data_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables (also same for downloading futures
            # contracts dates)
            if market_data_request.category is not None:
                if 'events' in market_data_request.category:
                    return data_frame_agg

            # pad columns a second time (is this necessary to do here again?)
            # TODO only do this for not daily data?
            try:
                if data_frame_agg is not None:
                    data_frame_agg = self.filter.filter_time_series(market_data_request, data_frame_agg, pad_columns=True)\
                        .dropna(how = 'all')

                    # resample data using pandas if specified in the MarketDataRequest
                    if market_data_request.resample is not None:
                        if 'last' in market_data_request.resample_how:
                            data_frame_agg = data_frame_agg.resample(
                                market_data_request.resample).last()
                        elif 'first' in market_data_request.resample_how:
                            data_frame_agg = data_frame_agg.resample(
                                market_data_request.resample).first()

                        if 'dropna' in market_data_request.resample_how:
                            data_frame_agg = data_frame_agg.dropna(how='all')
                else:
                    self.logger.warn("No data returned for " +
                                     str(market_data_request.tickers))

                return data_frame_agg
            except Exception as e:
                print(str(e))
                if data_frame_agg is not None:
                    return data_frame_agg

                import traceback

                self.logger.warn("No data returned for " +
                                 str(market_data_request.tickers))

                return None

    def create_time_series_hash_key(self, market_data_request, ticker=None):
        """Creates a hash key for retrieving the time series

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if (isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(
            MarketDataRequest().create_category_key(market_data_request,
                                                    ticker))

    def download_intraday_tick(self, market_data_request):
        """Loads intraday time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None
        calcuations = Calculations()

        ticker_cycle = 0

        data_frame_group = []

        # single threaded version
        # handle intraday ticker calls separately one by one
        if len(market_data_request.tickers) == 1 or DataConstants(
        ).market_thread_no['other'] == 1:
            for ticker in market_data_request.tickers:
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = ticker

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = [
                        market_data_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                # we downscale into float32, to avoid memory problems in Python (32 bit)
                # data is stored on disk as float32 anyway
                # old_finish_date = market_data_request_single.finish_date
                #
                # market_data_request_single.finish_date = self.refine_expiry_date(market_data_request)
                #
                # if market_data_request_single.finish_date >= market_data_request_single.start_date:
                #     data_frame_single = data_vendor.load_ticker(market_data_request_single)
                # else:
                #     data_frame_single = None
                #
                # market_data_request_single.finish_date = old_finish_date
                #
                # data_frame_single = data_vendor.load_ticker(market_data_request_single)

                data_frame_single = self.fetch_single_time_series(
                    market_data_request)

                # if the vendor doesn't provide any data, don't attempt to append
                if data_frame_single is not None:
                    if data_frame_single.empty == False:
                        data_frame_single.index.name = 'Date'
                        data_frame_single = data_frame_single.astype('float32')

                        data_frame_group.append(data_frame_single)

                        # # if you call for returning multiple tickers, be careful with memory considerations!
                        # if data_frame_agg is not None:
                        #     data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                        # else:
                        #     data_frame_agg = data_frame_single

                # key = self.create_category_key(market_data_request, ticker)
                # fname = self.create_cache_file_name(key)
                # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

            # if you call for returning multiple tickers, be careful with memory considerations!
            if data_frame_group is not None:
                data_frame_agg = calcuations.pandas_outer_join(
                    data_frame_group)

            return data_frame_agg

        else:
            market_data_request_list = []

            # create a list of MarketDataRequests
            for ticker in market_data_request.tickers:
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = ticker

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = [
                        market_data_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                market_data_request_list.append(market_data_request_single)

            return self.fetch_group_time_series(market_data_request_list)

    def fetch_single_time_series(self, market_data_request):

        market_data_request = MarketDataRequest(md_request=market_data_request)

        # only includes those tickers have not expired yet!
        start_date = pandas.Timestamp(market_data_request.start_date).date()

        import datetime

        current_date = datetime.datetime.utcnow().date()

        from datetime import timedelta

        tickers = market_data_request.tickers
        vendor_tickers = market_data_request.vendor_tickers

        expiry_date = market_data_request.expiry_date

        config = ConfigManager().get_instance()

        # in many cases no expiry is defined so skip them
        for i in range(0, len(tickers)):
            try:
                expiry_date = config.get_expiry_for_ticker(
                    market_data_request.data_source, tickers[i])
            except:
                pass

            if expiry_date is not None:
                expiry_date = pandas.Timestamp(expiry_date).date()

                # use pandas Timestamp, a bit more robust with weird dates (can fail if comparing date vs datetime)
                # if the expiry is before the start date of our download don't bother downloading this ticker
                if expiry_date < start_date:
                    tickers[i] = None

                # special case for futures-contracts which are intraday
                # avoid downloading if the expiry date is very far in the past
                # (we need this before there might be odd situations where we run on an expiry date, but still want to get
                # data right till expiry time)
                if market_data_request.category == 'futures-contracts' and market_data_request.freq == 'intraday' \
                        and self.days_expired_intraday_contract_download > 0:

                    if expiry_date + timedelta(
                            days=self.days_expired_intraday_contract_download
                    ) < current_date:
                        tickers[i] = None

                if vendor_tickers is not None and tickers[i] is None:
                    vendor_tickers[i] = None

        market_data_request.tickers = [e for e in tickers if e != None]

        if vendor_tickers is not None:
            market_data_request.vendor_tickers = [
                e for e in vendor_tickers if e != None
            ]

        data_frame_single = None

        if len(market_data_request.tickers) > 0:
            data_frame_single = self.get_data_vendor(
                market_data_request.data_source).load_ticker(
                    market_data_request)
            #print(data_frame_single.head(n=10))

        if data_frame_single is not None:
            if data_frame_single.empty == False:
                data_frame_single.index.name = 'Date'

                # will fail for dataframes which includes dates/strings (eg. futures contract names)
                try:
                    data_frame_single = data_frame_single.astype('float32')
                except:
                    self.logger.warning('Could not convert to float')

                if market_data_request.freq == "second":
                    data_frame_single = data_frame_single.resample("1s")

        return data_frame_single

    def fetch_group_time_series(self, market_data_request_list):

        data_frame_agg = None

        thread_no = DataConstants().market_thread_no['other']

        if market_data_request_list[0].data_source in DataConstants(
        ).market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request_list[0].data_source]

        if thread_no > 0:
            pool = SwimPool().create_pool(
                thread_technique=DataConstants().market_thread_technique,
                thread_no=thread_no)

            # open the market data downloads in their own threads and return the results
            result = pool.map_async(self.fetch_single_time_series,
                                    market_data_request_list)
            data_frame_group = result.get()

            pool.close()
            pool.join()
        else:
            data_frame_group = []

            for md_request in market_data_request_list:
                data_frame_group.append(
                    self.fetch_single_time_series(md_request))

        # collect together all the time series
        if data_frame_group is not None:
            data_frame_group = [i for i in data_frame_group if i is not None]

            # for debugging!
            # import pickle
            # import datetime
            # pickle.dump(data_frame_group, open(str(datetime.datetime.now()).replace(':', '-').replace(' ', '-').replace(".", "-") + ".p", "wb"))

            if data_frame_group is not None:
                try:
                    data_frame_agg = self.calculations.pandas_outer_join(
                        data_frame_group)
                except Exception as e:
                    self.logger.warning(
                        'Possible overlap of columns? Have you specifed same ticker several times: '
                        + str(e))

        return data_frame_agg

    def download_daily(self, market_data_request):
        """Loads daily time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        key = MarketDataRequest().create_category_key(market_data_request)

        is_key_overriden = False

        for k in DataConstants().override_multi_threading_for_categories:
            if k in key:
                is_key_overriden = True
                break

        # by default use other
        thread_no = DataConstants().market_thread_no['other']

        if market_data_request.data_source in DataConstants().market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request.data_source]

        # daily data does not include ticker in the key, as multiple tickers in the same file
        if thread_no == 1:
            # data_frame_agg = data_vendor.load_ticker(market_data_request)
            data_frame_agg = self.fetch_single_time_series(market_data_request)
        else:
            market_data_request_list = []

            # when trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution
            group_size = max(
                int(len(market_data_request.tickers) / thread_no - 1), 0)

            if group_size == 0: group_size = 1

            # split up tickers into groups related to number of threads to call
            for i in range(0, len(market_data_request.tickers), group_size):
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = market_data_request.tickers[
                    i:i + group_size]

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = \
                        market_data_request.vendor_tickers[i:i + group_size]

                market_data_request_list.append(market_data_request_single)

            # special case where we make smaller calls one after the other
            if is_key_overriden:

                data_frame_list = []

                for md in market_data_request_list:
                    data_frame_list.append(self.fetch_single_time_series(md))

                data_frame_agg = self.calculations.pandas_outer_join(
                    data_frame_list)
            else:
                data_frame_agg = self.fetch_group_time_series(
                    market_data_request_list)

        # fname = self.create_cache_file_name(key)
        # self._time_series_cache[fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def refine_expiry_date(self, market_data_request):

        # expiry date
        if market_data_request.expiry_date is None:
            ConfigManager().get_instance().get_expiry_for_ticker(
                market_data_request.data_source, market_data_request.ticker)

        return market_data_request

    def create_cache_file_name(self, filename):
        return DataConstants().folder_time_series_data + "/" + filename
コード例 #16
0
class HistEconDataFactory(object):

    def __init__(self, market_data_generator = None):
        self.logger = LoggerManager().getLogger(__name__)

        self._all_econ_tickers = pandas.read_csv(DataConstants().all_econ_tickers)
        self._econ_country_codes = pandas.read_csv(DataConstants().econ_country_codes)
        self._econ_country_groups = pandas.read_csv(DataConstants().econ_country_groups)

        if market_data_generator is None:
            self.market_data_generator = MarketDataGenerator()
        else:
            self.market_data_generator = market_data_generator

    def get_economic_data_history(self, start_date, finish_date, country_group, data_type,
        source = 'fred', cache_algo = "internet_load_return"):

        #vendor_country_codes = self.fred_country_codes[country_group]
        #vendor_pretty_country = self.fred_nice_country_codes[country_group]

        if isinstance(country_group, list):
            pretty_country_names = country_group
        else:
            # get all the country names in the country_group
            pretty_country_names = list(self._econ_country_groups[
                self._econ_country_groups["Country Group"] == country_group]['Country'])

        # construct the pretty tickers
        pretty_tickers = [x + '-' + data_type for x in pretty_country_names]

        # get vendor tickers
        vendor_tickers = []

        for pretty_ticker in pretty_tickers:
            vendor_ticker = list(self._all_econ_tickers[
                                         self._all_econ_tickers["Full Code"] == pretty_ticker][source].values)

            if vendor_ticker == []:
                vendor_ticker = None
                self.logger.error('Could not find match for ' + pretty_ticker)
            else:
                vendor_ticker = vendor_ticker[0]

            vendor_tickers.append(vendor_ticker)

        vendor_fields = ['close']

        if source == 'bloomberg': vendor_fields = ['PX_LAST']

        md_request = MarketDataRequest(
                start_date = start_date,                            # start date
                finish_date = finish_date,                          # finish date
                category = 'economic',
                freq = 'daily',                                     # intraday data
                data_source = source,                               # use Bloomberg as data source
                cut = 'LOC',
                tickers = pretty_tickers,
                fields = ['close'],                                 # which fields to download
                vendor_tickers = vendor_tickers,
                vendor_fields = vendor_fields,                      # which Bloomberg fields to download
                cache_algo = cache_algo)                            # how to return data

        return self.market_data_generator.fetch_market_data(md_request)

    def grasp_coded_entry(self, df, index):
        df = df.ix[index:].stack()
        df = df.reset_index()
        df.columns = ['Date', 'Name', 'Val']

        countries = df['Name']

        countries = [x.split('-', 1)[0] for x in countries]

        df['Code'] = sum(
            [list(self._econ_country_codes[self._econ_country_codes["Country"] == x]['Code']) for x in countries],
            [])

        return df
コード例 #17
0
 def __init__(self):
     self.logger = LoggerManager().getLogger(__name__)
     self._pnl = None
     self._portfolio = None
     return
コード例 #18
0
ファイル: datavendorweb.py プロジェクト: alamtahsin/findatapy
 def __init__(self):
     super(DataVendorALFRED, self).__init__()
     self.logger = LoggerManager().getLogger(__name__)
コード例 #19
0
ファイル: datavendorweb.py プロジェクト: alamtahsin/findatapy
class DataVendorDukasCopy(DataVendor):
    tick_name  = "{symbol}/{year}/{month}/{day}/{hour}h_ticks.bi5"

    def __init__(self):
        super(DataVendor, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        import logging
        logging.getLogger("requests").setLevel(logging.WARNING)
        self.config = ConfigManager()

    # implement method in abstract superclass
    def load_ticker(self, market_data_request):
        """
        load_ticker - Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        market_data_request : TimeSeriesRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """

        market_data_request_vendor = self.construct_vendor_market_data_request(market_data_request)

        data_frame = None
        self.logger.info("Request Dukascopy data")

        # doesn't support non-tick data
        if (market_data_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly', 'intraday', 'minute', 'hourly']):
            self.logger.warning("Dukascopy loader is for tick data only")

            return None

        # assume one ticker only (MarketDataGenerator only calls one ticker at a time)
        if (market_data_request.freq in ['tick']):
            # market_data_request_vendor.tickers = market_data_request_vendor.tickers[0]

            data_frame = self.get_tick(market_data_request, market_data_request_vendor)

            if data_frame is not None: data_frame.tz_localize('UTC')

        self.logger.info("Completed request from Dukascopy")

        return data_frame

    def kill_session(self):
        return

    def get_tick(self, market_data_request, market_data_request_vendor):

        data_frame = self.download_tick(market_data_request_vendor)

        # convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            returned_fields = data_frame.columns
            returned_tickers = [market_data_request_vendor.tickers[0]] * (len(returned_fields))

        if data_frame is not None:
            fields = self.translate_from_vendor_field(returned_fields, market_data_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def download_tick(self, market_data_request):

        symbol = market_data_request.tickers[0]
        df_list = []

        self.logger.info("About to download from Dukascopy... for " + symbol)

        # single threaded
        df_list = [self.fetch_file(time, symbol) for time in
                  self.hour_range(market_data_request.start_date, market_data_request.finish_date)]

        # TODO parallel (has pickle issues)
        # time_list = self.hour_range(market_data_request.start_date, market_data_request.finish_date)
        # import multiprocessing_on_dill as multiprocessing
        #
        # pool = multiprocessing.Pool(processes=4)
        # results = [pool.apply_async(self.fetch_file, args=(time, symbol)) for time in time_list]
        # df_list = [p.get() for p in results]

        try:
            return pandas.concat(df_list)
        except:
            return None

    def fetch_file(self, time, symbol):
        if time.hour % 24 == 0: self.logger.info("Downloading... " + str(time))

        tick_path = self.tick_name.format(
                symbol = symbol,
                year = str(time.year).rjust(4, '0'),
                month = str(time.month).rjust(2, '0'),
                day = str(time.day).rjust(2, '0'),
                hour = str(time.hour).rjust(2, '0')
            )

        tick = self.fetch_tick(DataConstants().dukascopy_base_url + tick_path)

        if DataConstants().dukascopy_write_temp_tick_disk:
            out_path = DataConstants().temp_folder + "/dkticks/" + tick_path

            if not os.path.exists(out_path):
                if not os.path.exists(os.path.dirname(out_path)):
                    os.makedirs(os.path.dirname(out_path))

            self.write_tick(tick, out_path)

        try:
            return self.retrieve_df(lzma.decompress(tick), symbol, time)
        except:
            return None

    def fetch_tick(self, tick_url):
        i = 0
        tick_request = None

        # try up to 5 times to download
        while i < 5:
            try:
                tick_request = requests.get(tick_url)
                i = 5
            except:
                i = i + 1

        if (tick_request is None):
            self.logger("Failed to download from " + tick_url)
            return None

        return tick_request.content

    def write_tick(self, content, out_path):
        data_file = open(out_path, "wb+")
        data_file.write(content)
        data_file.close()

    def chunks(self, list, n):
        if n < 1:
            n = 1
        return [list[i:i + n] for i in range(0, len(list), n)]

    def retrieve_df(self, data, symbol, epoch):
        date, tuple = self.parse_tick_data(data, epoch)

        df = pandas.DataFrame(data = tuple, columns=['temp', 'ask', 'bid', 'askv', 'bidv'], index = date)
        df.drop('temp', axis = 1)
        df.index.name = 'Date'

        divisor = 100000

        # where JPY is the terms currency we have different divisor
        if symbol[3:6] == 'JPY':
            divisor = 1000

        # prices are returned without decimal point
        df['bid'] =  df['bid'] /  divisor
        df['ask'] =  df['ask'] / divisor

        return df

    def hour_range(self, start_date, end_date):
          delta_t = end_date - start_date

          delta_hours = (delta_t.days *  24.0) + (delta_t.seconds / 3600.0)

          for n in range(int (delta_hours)):
              yield start_date + timedelta(0, 0, 0, 0, 0, n) # Hours

    def parse_tick_data(self, data, epoch):
        import struct

        # tick = namedtuple('Tick', 'Date ask bid askv bidv')

        chunks_list = self.chunks(data, 20)
        parsed_list = []
        date = []

        # note: Numba can speed up for loops
        for row in chunks_list:
            d = struct.unpack(">LLLff", row)
            date.append((epoch + timedelta(0,0,0, d[0])))

            # SLOW: no point using named tuples!
            # row_data = tick._asdict(tick._make(d))
            # row_data['Date'] = (epoch + timedelta(0,0,0,row_data['Date']))

            parsed_list.append(d)

        return date, parsed_list

    def chunks(self, list, n):
        if n < 1: n = 1

        return [list[i:i + n] for i in range(0, len(list), n)]

    def get_daily_data(self):
        pass
コード例 #20
0
ファイル: datavendorweb.py プロジェクト: alamtahsin/findatapy
class DataVendorQuandl(DataVendor):

    def __init__(self):
        super(DataVendorQuandl, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, market_data_request):
        market_data_request_vendor = self.construct_vendor_market_data_request(market_data_request)

        self.logger.info("Request Quandl data")

        data_frame = self.download_daily(market_data_request_vendor)

        if data_frame is None or data_frame.index is []: return None

        # convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            returned_tickers = data_frame.columns

        if data_frame is not None:
            # tidy up tickers into a format that is more easily translatable
            # we can often get multiple fields returned (even if we don't ask for them!)
            # convert to lower case
            returned_fields = [(x.split(' - ')[1]).lower().replace(' ', '-').replace('.', '-').replace('--', '-') for x in returned_tickers]

            returned_fields = [x.replace('value', 'close') for x in returned_fields]    # special case for close

            # replace time fields (can cause problems later for times to start with 0)
            for i in range(0, 10):
                returned_fields = [x.replace('0'+ str(i) + ':00', str(i) + ':00') for x in returned_fields]

            returned_tickers = [x.replace('.', '/') for x in returned_tickers]
            returned_tickers = [x.split(' - ')[0] for x in returned_tickers]

            try:
                fields = self.translate_from_vendor_field(returned_fields, market_data_request)
                tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request)
            except:
                print('error')

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        self.logger.info("Completed request from Quandl for " + str(ticker_combined))

        return data_frame

    def download_daily(self, market_data_request):
        trials = 0

        data_frame = None

        while(trials < 5):
            try:
                data_frame = Quandl.get(market_data_request.tickers, authtoken=DataConstants().quandl_api_key, trim_start=market_data_request.start_date,
                                        trim_end=market_data_request.finish_date)

                break
            except:
                trials = trials + 1
                self.logger.info("Attempting... " + str(trials) + " request to download from Quandl")

        if trials == 5:
            self.logger.error("Couldn't download from Quandl after several attempts!")

        return data_frame
コード例 #21
0
class MarketDataGenerator(object):
    _time_series_cache = {}  # shared across all instances of object!

    def __init__(self):
        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.filter = Filter()
        self.calculations = Calculations()
        self.io_engine = IOEngine()
        self._intraday_code = -1

        return

    def flush_cache(self):
        """
        flush_cache - Flushs internal cache of time series
        """

        self._time_series_cache = {}

    def set_intraday_code(self, code):
        self._intraday_code = code

    def get_data_vendor(self, source):
        """
        get_loader - Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.
            we can also have forms like "bloomberg-boe" separated by hyphens

        Returns
        -------
        DataVendor
        """

        data_vendor = None

        source = source.split("-")[0]

        if source == 'bloomberg':
            from findatapy.market.datavendorbbg import DataVendorBBGOpen
            data_vendor = DataVendorBBGOpen()

        elif source == 'quandl':
            from findatapy.market.datavendorweb import DataVendorQuandl
            data_vendor = DataVendorQuandl()

        elif source == 'ons':
            from findatapy.market.datavendorweb import DataVendorONS
            data_vendor = DataVendorONS()

        elif source == 'boe':
            from findatapy.market.datavendorweb import DataVendorBOE
            data_vendor = DataVendorBOE()

        elif source == 'dukascopy':
            from findatapy.market.datavendorweb import DataVendorDukasCopy
            data_vendor = DataVendorDukasCopy()

        elif source in [
                'yahoo', 'google', 'fred', 'oecd', 'eurostat', 'edgar-index'
        ]:
            from findatapy.market.datavendorweb import DataVendorPandasWeb
            data_vendor = DataVendorPandasWeb()

        # TODO add support for other data sources (like Reuters)

        return data_vendor

    def fetch_market_data(self, market_data_request, kill_session=True):
        """
        fetch_market_data - Loads time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        tickers = market_data_request.tickers
        data_vendor = self.get_data_vendor(market_data_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None:
            create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            market_data_request.tickers = self.config.get_tickers_list_for_category(
                market_data_request.category, market_data_request.data_source,
                market_data_request.freq, market_data_request.cut)

        # intraday or tick: only one ticker per cache file
        if (market_data_request.freq
                in ['intraday', 'tick', 'second', 'hour', 'minute']):
            data_frame_agg = self.download_intraday_tick(
                market_data_request, data_vendor)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else:
            data_frame_agg = self.download_daily(market_data_request,
                                                 data_vendor)

        if ('internet_load' in market_data_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to data_vendor template to exit session
            # if data_vendor is not None and kill_session == True: data_vendor.kill_session()

        if (market_data_request.cache_algo == 'cache_algo'):
            self.logger.debug(
                "Only caching data in memory, do not return any time series.")
            return

        # only return time series if specified in the algo
        if 'return' in market_data_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if market_data_request.category is not None:
                if 'events' in market_data_request.category:
                    return data_frame_agg

            try:
                return self.filter.filter_time_series(market_data_request,
                                                      data_frame_agg,
                                                      pad_columns=True)
            except:
                import traceback

                self.logger.error(traceback.format_exc())

                return None

    def get_market_data_cached(self, market_data_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        if (market_data_request.freq == "intraday"):
            ticker = market_data_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(market_data_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            return self.filter.filter_time_series(market_data_request,
                                                  data_frame)

        return None

    def create_time_series_hash_key(self, market_data_request, ticker=None):
        """
        create_time_series_hash_key - Creates a hash key for retrieving the time series

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        if (isinstance(ticker, list)):
            ticker = ticker[0]

        return self.create_cache_file_name(
            self.create_category_key(market_data_request, ticker))

    def download_intraday_tick(self, market_data_request, data_vendor):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None
        calcuations = Calculations()

        ticker_cycle = 0

        data_frame_group = []

        # single threaded version
        # handle intraday ticker calls separately one by one
        if len(market_data_request.tickers) == 1 or DataConstants(
        ).market_thread_no['other'] == 1:
            for ticker in market_data_request.tickers:
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = ticker

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = [
                        market_data_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                # we downscale into float32, to avoid memory problems in Python (32 bit)
                # data is stored on disk as float32 anyway
                data_frame_single = data_vendor.load_ticker(
                    market_data_request_single)

                # if the vendor doesn't provide any data, don't attempt to append
                if data_frame_single is not None:
                    if data_frame_single.empty == False:
                        data_frame_single.index.name = 'Date'
                        data_frame_single = data_frame_single.astype('float32')

                        data_frame_group.append(data_frame_single)

                        # # if you call for returning multiple tickers, be careful with memory considerations!
                        # if data_frame_agg is not None:
                        #     data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                        # else:
                        #     data_frame_agg = data_frame_single

                # key = self.create_category_key(market_data_request, ticker)
                # fname = self.create_cache_file_name(key)
                # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

            # if you call for returning multiple tickers, be careful with memory considerations!
            if data_frame_group is not None:
                data_frame_agg = calcuations.pandas_outer_join(
                    data_frame_group)

            return data_frame_agg
        else:
            market_data_request_list = []

            # create a list of MarketDataRequests
            for ticker in market_data_request.tickers:
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = ticker

                if hasattr(market_data_request, 'vendor_tickers'):
                    market_data_request_single.vendor_tickers = [
                        market_data_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                market_data_request_list.append(market_data_request_single)

            return self.fetch_group_time_series(market_data_request_list)

    def fetch_single_time_series(self, market_data_request):
        data_frame_single = self.get_data_vendor(
            market_data_request.data_source).load_ticker(market_data_request)

        if data_frame_single is not None:
            if data_frame_single.empty == False:
                data_frame_single.index.name = 'Date'

                # will fail for dataframes which includes dates
                try:
                    data_frame_single = data_frame_single.astype('float32')
                except:
                    pass

                if market_data_request.freq == "second":
                    data_frame_single = data_frame_single.resample("1s")

        return data_frame_single

    def fetch_group_time_series(self, market_data_request_list):

        data_frame_agg = None

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if DataConstants().market_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = DataConstants().market_thread_no['other']

        if market_data_request_list[0].data_source in DataConstants(
        ).market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request_list[0].data_source]

        if thread_no > 0:
            pool = Pool(thread_no)

            # open the market data downloads in their own threads and return the results
            result = pool.map_async(self.fetch_single_time_series,
                                    market_data_request_list)
            data_frame_group = result.get()

            pool.close()
            pool.join()
        else:
            data_frame_group = []

            for md_request in market_data_request_list:
                data_frame_group.append(
                    self.fetch_single_time_series(md_request))

        # collect together all the time series
        if data_frame_group is not None:
            data_frame_group = [i for i in data_frame_group if i is not None]

            if data_frame_group is not None:
                data_frame_agg = self.calculations.pandas_outer_join(
                    data_frame_group)

        return data_frame_agg

    def download_daily(self, market_data_request, data_vendor):
        """
        download_daily - Loads daily time series from specified data provider

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        # daily data does not include ticker in the key, as multiple tickers in the same file

        if DataConstants().market_thread_no['other'] == 1:
            data_frame_agg = data_vendor.load_ticker(market_data_request)
        else:
            market_data_request_list = []

            group_size = int(
                len(market_data_request.tickers) /
                DataConstants().market_thread_no['other'] - 1)

            if group_size == 0: group_size = 1

            # split up tickers into groups related to number of threads to call
            for i in range(0, len(market_data_request.tickers), group_size):
                market_data_request_single = copy.copy(market_data_request)
                market_data_request_single.tickers = market_data_request.tickers[
                    i:i + group_size]

                if market_data_request.vendor_tickers is not None:
                    market_data_request_single.vendor_tickers = \
                        market_data_request.vendor_tickers[i:i + group_size]

                market_data_request_list.append(market_data_request_single)

            data_frame_agg = self.fetch_group_time_series(
                market_data_request_list)

        key = self.create_category_key(market_data_request)
        fname = self.create_cache_file_name(key)
        self._time_series_cache[
            fname] = data_frame_agg  # cache in memory (ok for daily data)

        return data_frame_agg

    def create_category_key(self, market_data_request, ticker=None):
        """
        create_category_key - Returns a category key for the associated MarketDataRequest

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        category = 'default-cat'
        cut = 'default-cut'

        if market_data_request.category is not None:
            category = market_data_request.category

        environment = market_data_request.environment
        source = market_data_request.data_source
        freq = market_data_request.freq

        if market_data_request.cut is not None: cut = market_data_request.cut

        if (ticker is not None):
            key = environment + "." + category + '.' + source + '.' + freq + '.' + cut + '.' + ticker
        else:
            key = environment + "." + category + '.' + source + '.' + freq + '.' + cut

        return key

    def create_cache_file_name(self, filename):
        return DataConstants().folder_time_series_data + "/" + filename
コード例 #22
0
    def price_instrument(self,
                         cross,
                         horizon_date,
                         strike,
                         expiry_date=None,
                         vol=None,
                         notional=1000000,
                         contract_type='european-call',
                         tenor=None,
                         fx_vol_surface=None,
                         premium_output=None,
                         delta_output=None,
                         depo_tenor=None,
                         return_as_df=True):
        """Prices FX options for horizon dates/expiry dates given by the user from FX spot rates, FX volatility surface
        and deposit rates.

        Parameters
        ----------
        cross : str
            Currency pair

        horizon_date : DateTimeIndex
            Horizon dates for options

        expiry_date : DateTimeIndex
            expiry dates for options

        market_df : DataFrame
            Contains FX spot, FX vol surface quotes, FX forwards and base depos

        Returns
        -------
        DataFrame
        """

        # if market_df is None: market_df = self._market_df
        if fx_vol_surface is None: fx_vol_surface = self._fx_vol_surface
        if premium_output is None: premium_output = self._premium_output
        if delta_output is None: delta_output = self._delta_output

        logger = LoggerManager().getLogger(__name__)

        field = fx_vol_surface._field

        # Make horizon date and expiry date pandas DatetimeIndex
        if isinstance(horizon_date, pd.Timestamp):
            horizon_date = pd.DatetimeIndex([horizon_date])
        else:
            horizon_date = pd.DatetimeIndex(horizon_date)

        if expiry_date is not None:
            if isinstance(expiry_date, pd.Timestamp):
                expiry_date = pd.DatetimeIndex([expiry_date])
            else:
                expiry_date = pd.DatetimeIndex(expiry_date)
        else:
            expiry_date = self._calendar.get_expiry_date_from_horizon_date(
                horizon_date, tenor, cal=cross)

        # If the strike hasn't been supplied need to work this out
        if not (isinstance(strike, np.ndarray)):
            old_strike = strike

            if isinstance(strike, str):
                strike = np.empty(len(horizon_date), dtype=object)
            else:
                strike = np.empty(len(horizon_date))

            strike.fill(old_strike)

        # If the vol hasn't been supplied need to work this out
        if not (isinstance(vol, np.ndarray)):

            if vol is None:
                vol = np.nan

            old_vol = vol

            vol = np.empty(len(horizon_date))
            vol.fill(old_vol)

        option_values = np.zeros(len(horizon_date))
        spot = np.zeros(len(horizon_date))
        delta = np.zeros(len(horizon_date))
        intrinsic_values = np.zeros(len(horizon_date))

        def _price_option(contract_type_, contract_type_fin_):
            for i in range(len(expiry_date)):
                built_vol_surface = False

                # If we have a "key strike" need to fit the vol surface
                if isinstance(strike[i], str):
                    if not (built_vol_surface):

                        fx_vol_surface.build_vol_surface(horizon_date[i])
                        fx_vol_surface.extract_vol_surface(
                            num_strike_intervals=None)

                        built_vol_surface = True

                    # Delta neutral strike/or whatever strike is quoted as ATM
                    # usually this is ATM delta neutral strike, but can sometimes be ATMF for some Latam
                    # Take the vol directly quoted, rather than getting it from building vol surface
                    if strike[i] == 'atm':
                        strike[i] = fx_vol_surface.get_atm_strike(tenor)
                        vol[i] = fx_vol_surface.get_atm_quoted_vol(
                            tenor) / 100.0
                        # vol[i] = fx_vol_surface.get_atm_vol(tenor) / 100.0 # interpolated
                    elif strike[i] == 'atms':
                        strike[i] = fx_vol_surface.get_spot(
                        )  # Interpolate vol later
                    elif strike[i] == 'atmf':
                        # Quoted tenor, no need to interpolate
                        strike[i] = float(fx_vol_surface.get_all_market_data()[cross + ".close"][horizon_date[i]]) \
                                          + (float(fx_vol_surface.get_all_market_data()[cross + tenor + ".close"][horizon_date[i]]) \
                                    / self._fx_forwards_pricer.get_forwards_divisor(cross[3:6]))

                        # Interpolate vol later
                    elif strike[i] == '25d-otm':
                        if 'call' in contract_type_:
                            strike[i] = fx_vol_surface.get_25d_call_strike(
                                tenor)
                            vol[i] = fx_vol_surface.get_25d_call_vol(
                                tenor) / 100.0
                        elif 'put' in contract_type_:
                            strike[i] = fx_vol_surface.get_25d_put_strike(
                                tenor)
                            vol[i] = fx_vol_surface.get_25d_put_vol(
                                tenor) / 100.0
                    elif strike[i] == '10d-otm':
                        if 'call' in contract_type_:
                            strike[i] = fx_vol_surface.get_10d_call_strike(
                                tenor)
                            vol[i] = fx_vol_surface.get_10d_call_vol(
                                tenor) / 100.0
                        elif 'put' in contract_type_:
                            strike[i] = fx_vol_surface.get_10d_put_strike(
                                tenor)
                            vol[i] = fx_vol_surface.get_10d_put_vol(
                                tenor) / 100.0

                if not (built_vol_surface):
                    try:
                        fx_vol_surface.build_vol_surface(horizon_date[i])
                    except:
                        logger.warn("Failed to build vol surface for " +
                                    str(horizon_date) +
                                    ", won't be able to interpolate vol")
                    # fx_vol_surface.extract_vol_surface(num_strike_intervals=None)

                # If an implied vol hasn't been provided, interpolate that one, fit the vol surface (if hasn't already been
                # done)
                if np.isnan(vol[i]):

                    if tenor is None:
                        vol[i] = fx_vol_surface.calculate_vol_for_strike_expiry(
                            strike[i], expiry_date=expiry_date[i], tenor=None)
                    else:
                        vol[i] = fx_vol_surface.calculate_vol_for_strike_expiry(
                            strike[i], expiry_date=None, tenor=tenor)

                model = FinModelBlackScholes(float(vol[i]))

                logger.info("Pricing " + contract_type_ +
                            " option, horizon date = " + str(horizon_date[i]) +
                            ", expiry date = " + str(expiry_date[i]))

                option = FinFXVanillaOption(self._findate(expiry_date[i]),
                                            strike[i], cross,
                                            contract_type_fin_, notional,
                                            cross[0:3])

                spot[i] = fx_vol_surface.get_spot()
                """ FinancePy will return the value in the following dictionary for values
                    {'v': vdf,
                    "cash_dom": cash_dom,
                    "cash_for": cash_for,
                    "pips_dom": pips_dom,
                    "pips_for": pips_for,
                    "pct_dom": pct_dom,
                    "pct_for": pct_for,
                    "not_dom": notional_dom,
                    "not_for": notional_for,
                    "ccy_dom": self._domName,
                    "ccy_for": self._forName}
                """

                option_values[i] = option_values[i] + option.value(
                    self._findate(horizon_date[i]), spot[i],
                    fx_vol_surface.get_dom_discount_curve(),
                    fx_vol_surface.get_for_discount_curve(),
                    model)[premium_output.replace('-', '_')]

                intrinsic_values[i] = intrinsic_values[i] + option.value(
                    self._findate(expiry_date[i]), spot[i],
                    fx_vol_surface.get_dom_discount_curve(),
                    fx_vol_surface.get_for_discount_curve(),
                    model)[premium_output.replace('-', '_')]
                """FinancePy returns this dictionary for deltas
                    {"pips_spot_delta": pips_spot_delta,
                    "pips_fwd_delta": pips_fwd_delta,
                    "pct_spot_delta_prem_adj": pct_spot_delta_prem_adj,
                    "pct_fwd_delta_prem_adj": pct_fwd_delta_prem_adj}
                """

                delta[i] = delta[i] + option.delta(
                    self._findate(horizon_date[i]), spot[i],
                    fx_vol_surface.get_dom_discount_curve(),
                    fx_vol_surface.get_for_discount_curve(),
                    model)[delta_output.replace('-', '_')]

        if contract_type == 'european-call':
            contract_type_fin = FinOptionTypes.EUROPEAN_CALL

            _price_option(contract_type, contract_type_fin)
        elif contract_type == 'european-put':
            contract_type_fin = FinOptionTypes.EUROPEAN_PUT

            _price_option(contract_type, contract_type_fin)
        elif contract_type == 'european-straddle' or contract_type == 'european-strangle':
            contract_type = 'european-call'
            contract_type_fin = FinOptionTypes.EUROPEAN_CALL

            _price_option(contract_type, contract_type_fin)

            contract_type = 'european-put'
            contract_type_fin = FinOptionTypes.EUROPEAN_PUT

            _price_option(contract_type, contract_type_fin)

        if return_as_df:
            option_prices_df = pd.DataFrame(index=horizon_date)

            option_prices_df[cross + '-option-price.' + field] = option_values
            option_prices_df[cross + '.' + field] = spot
            option_prices_df[cross + '-strike.' + field] = strike
            option_prices_df[cross + '-vol.' + field] = vol
            option_prices_df[cross + '-delta.' + field] = delta
            option_prices_df[cross + '.expiry-date'] = expiry_date
            option_prices_df[cross + '-intrinsic-value.' +
                             field] = intrinsic_values

            return option_prices_df

        return option_values, spot, strike, vol, delta, expiry_date, intrinsic_values
コード例 #23
0
class TradingModel(object):

    #### Default parameters for outputting of results from trading model
    SAVE_FIGURES = True
    DEFAULT_PLOT_ENGINE = ChartConstants().chartfactory_default_engine
    SCALE_FACTOR = ChartConstants().chartfactory_scale_factor
    CHART_SOURCE = 'Web'

    DUMP_CSV = ''
    DUMP_PATH = datetime.date.today().strftime("%Y%m%d") + ' '
    chart = Chart(engine=DEFAULT_PLOT_ENGINE)

    logger = LoggerManager().getLogger(__name__)

    def __init__(self):
        pass

    # to be implemented by every trading strategy
    @abc.abstractmethod
    def load_parameters(self):
        """
        load_parameters - Fills parameters for the backtest, such as start-end dates, transaction costs etc. To
        be implemented by subclass.
        """
        return

    @abc.abstractmethod
    def load_assets(self):
        """
        load_assets - Loads time series for the assets to be traded and also for data for generating signals.
        """
        return

    @abc.abstractmethod
    def construct_signal(self, spot_df, spot_df2, tech_params):
        """
        construct_signal - Constructs signal from pre-loaded time series

        Parameters
        ----------
        spot_df : pandas.DataFrame
            Market time series for generating signals

        spot_df2 : pandas.DataFrame
            Market time series for generated signals (can be of different frequency)

        tech_params : TechParams
            Parameters for generating signals
        """
        return

    ####### Generic functions for every backtest
    def construct_strategy(self, br=None):
        """
        construct_strategy - Constructs the returns for all the strategies which have been specified.

        - gets parameters form fill_backtest_request
        - market data from fill_assets

        """

        calculations = Calculations()

        # get the parameters for backtesting
        if hasattr(self, 'br'):
            br = self.br
        elif br is None:
            br = self.load_parameters()

        # get market data for backtest
        asset_df, spot_df, spot_df2, basket_dict = self.load_assets()

        if hasattr(br, 'tech_params'):
            tech_params = br.tech_params
        else:
            tech_params = TechParams()

        cumresults = pandas.DataFrame(index=asset_df.index)
        portleverage = pandas.DataFrame(index=asset_df.index)

        from collections import OrderedDict
        ret_statsresults = OrderedDict()

        # each portfolio key calculate returns - can put parts of the portfolio in the key
        for key in basket_dict.keys():
            asset_cut_df = asset_df[[x + '.close' for x in basket_dict[key]]]
            spot_cut_df = spot_df[[x + '.close' for x in basket_dict[key]]]

            self.logger.info("Calculating " + key)

            results, backtest = self.construct_individual_strategy(
                br, spot_cut_df, spot_df2, asset_cut_df, tech_params, key)

            cumresults[results.columns[0]] = results
            portleverage[results.columns[0]] = backtest.get_porfolio_leverage()
            ret_statsresults[key] = backtest.get_portfolio_pnl_ret_stats()

            # for a key, designated as the final strategy save that as the "strategy"
            if key == self.FINAL_STRATEGY:
                self._strategy_pnl = results
                self._strategy_pnl_ret_stats = backtest.get_portfolio_pnl_ret_stats(
                )
                self._strategy_leverage = backtest.get_porfolio_leverage()
                self._strategy_signal = backtest.get_porfolio_signal()
                self._strategy_pnl_trades = backtest.get_pnl_trades()

        # get benchmark for comparison
        benchmark = self.construct_strategy_benchmark()

        cumresults_benchmark = self.compare_strategy_vs_benchmark(
            br, cumresults, benchmark)

        self._strategy_group_benchmark_ret_stats = ret_statsresults

        if hasattr(self, '_benchmark_ret_stats'):
            ret_statslist = ret_statsresults
            ret_statslist['Benchmark'] = (self._benchmark_ret_stats)
            self._strategy_group_benchmark_ret_stats = ret_statslist

        # calculate annualised returns
        years = calculations.average_by_annualised_year(
            calculations.calculate_returns(cumresults_benchmark))

        self._strategy_group_pnl = cumresults
        self._strategy_group_pnl_ret_stats = ret_statsresults
        self._strategy_group_benchmark_pnl = cumresults_benchmark
        self._strategy_group_leverage = portleverage
        self._strategy_group_benchmark_annualised_pnl = years

    def construct_individual_strategy(self, br, spot_df, spot_df2, asset_df,
                                      tech_params, key):
        """
        construct_individual_strategy - Combines the signal with asset returns to find the returns of an individual
        strategy

        Parameters
        ----------
        br : BacktestRequest
            Parameters for backtest such as start and finish dates

        spot_df : pandas.DataFrame
            Market time series for generating signals

        spot_df2 : pandas.DataFrame
            Secondary Market time series for generated signals (can be of different frequency)

        tech_params : TechParams
            Parameters for generating signals

        Returns
        -------
        cumportfolio : pandas.DataFrame
        backtest : Backtest
        """
        backtest = Backtest()

        signal_df = self.construct_signal(spot_df, spot_df2, tech_params,
                                          br)  # get trading signal
        backtest.calculate_trading_PnL(br, asset_df,
                                       signal_df)  # calculate P&L

        cumpnl = backtest.get_cumpnl()

        if br.write_csv: cumpnl.to_csv(self.DUMP_CSV + key + ".csv")

        cumportfolio = backtest.get_cumportfolio()

        if br.calc_stats:
            cumportfolio.columns = [
                key + ' ' + str(backtest.get_portfolio_pnl_desc()[0])
            ]
        else:
            cumportfolio.columns = [key]

        return cumportfolio, backtest

    def compare_strategy_vs_benchmark(self, br, strategy_df, benchmark_df):
        """
        compare_strategy_vs_benchmark - Compares the trading strategy we are backtesting against a benchmark

        Parameters
        ----------
        br : BacktestRequest
            Parameters for backtest such as start and finish dates

        strategy_df : pandas.DataFrame
            Strategy time series

        benchmark_df : pandas.DataFrame
            Benchmark time series
        """

        include_benchmark = False
        calc_stats = False

        if hasattr(br, 'include_benchmark'):
            include_benchmark = br.include_benchmark
        if hasattr(br, 'calc_stats'): calc_stats = br.calc_stats

        if include_benchmark:
            ret_stats = RetStats()
            risk_engine = RiskEngine()
            filter = Filter()
            calculations = Calculations()

            # align strategy time series with that of benchmark
            strategy_df, benchmark_df = strategy_df.align(benchmark_df,
                                                          join='left',
                                                          axis=0)

            # if necessary apply vol target to benchmark (to make it comparable with strategy)
            if hasattr(br, 'portfolio_vol_adjust'):
                if br.portfolio_vol_adjust is True:
                    benchmark_df = risk_engine.calculate_vol_adjusted_index_from_prices(
                        benchmark_df, br=br)

            # only calculate return statistics if this has been specified (note when different frequencies of data
            # might underrepresent vol
            if calc_stats:
                benchmark_df = benchmark_df.fillna(method='ffill')
                ret_stats.calculate_ret_stats_from_prices(
                    benchmark_df, br.ann_factor)
                benchmark_df.columns = ret_stats.summary()

            # realign strategy & benchmark
            strategy_benchmark_df = strategy_df.join(benchmark_df, how='inner')
            strategy_benchmark_df = strategy_benchmark_df.fillna(
                method='ffill')

            strategy_benchmark_df = filter.filter_time_series_by_date(
                br.plot_start, br.finish_date, strategy_benchmark_df)
            strategy_benchmark_df = calculations.create_mult_index_from_prices(
                strategy_benchmark_df)

            self._benchmark_pnl = benchmark_df
            self._benchmark_ret_stats = ret_stats

            return strategy_benchmark_df

        return strategy_df

    def get_strategy_name(self):
        return self.FINAL_STRATEGY

    def get_individual_leverage(self):
        return self._individual_leverage

    def get_strategy_group_pnl_trades(self):
        return self._strategy_pnl_trades

    def get_strategy_pnl(self):
        return self._strategy_pnl

    def get_strategy_pnl_ret_stats(self):
        return self._strategy_pnl_ret_stats

    def get_strategy_leverage(self):
        return self._strategy_leverage

    def get_strategy_group_benchmark_pnl(self):
        return self._strategy_group_benchmark_pnl

    def get_strategy_group_benchmark_ret_stats(self):
        return self._strategy_group_benchmark_ret_stats

    def get_strategy_leverage(self):
        return self._strategy_group_leverage

    def get_strategy_signal(self):
        return self._strategy_signal

    def get_benchmark(self):
        return self._benchmark_pnl

    def get_benchmark_ret_stats(self):
        return self._benchmark_ret_stats

    def get_strategy_group_benchmark_annualised_pnl(self):
        return self._strategy_group_benchmark_annualised_pnl

    #### Plotting

    def reduce_plot(self, data_frame):
        """
        reduce_plot - Reduces the frequency of a time series to every business day so it can be plotted more easily

        Parameters
        ----------
        data_frame: pandas.DataFrame
            Strategy time series

        Returns
        -------
        pandas.DataFrame
        """
        try:
            # make plots on every business day (will downsample intraday data)
            data_frame = data_frame.resample('B')
            data_frame = data_frame.fillna(method='pad')

            return data_frame
        except:
            return data_frame

    ##### Quick helper functions to plot aspects of the strategy such as P&L, leverage etc.
    def plot_individual_leverage(self):

        style = self.create_style("Leverage", "Individual Leverage")

        try:
            self.chart.plot(self.reduce_plot(self._individual_leverage),
                            chart_type='line',
                            style=style)
        except:
            pass

    def plot_strategy_group_pnl_trades(self):

        style = self.create_style("(bp)", "Individual Trade PnL")

        # zero when there isn't a trade exit
        # strategy_pnl_trades = self._strategy_pnl_trades * 100 * 100
        # strategy_pnl_trades = strategy_pnl_trades.dropna()

        # note only works with single large basket trade
        try:
            strategy_pnl_trades = self._strategy_pnl_trades.fillna(
                0) * 100 * 100
            self.chart.plot(self.reduce_plot(strategy_pnl_trades),
                            chart_type='line',
                            style=style)
        except:
            pass

    def plot_strategy_pnl(self):

        style = self.create_style("", "Strategy PnL")

        try:
            self.chart.plot(self.reduce_plot(self._strategy_pnl),
                            chart_type='line',
                            style=style)
        except:
            pass

    def plot_strategy_signal_proportion(self, strip=None):

        signal = self._strategy_signal

        # count number of long, short and flat periods in our sample
        long = signal[signal > 0].count()
        short = signal[signal < 0].count()
        flat = signal[signal == 0].count()

        keys = long.index

        # how many trades have there been (ignore size of the trades)
        trades = abs(signal - signal.shift(-1))
        trades = trades[trades > 0].count()

        df_trades = pandas.DataFrame(index=keys,
                                     columns=['Trades'],
                                     data=trades)

        df = pandas.DataFrame(index=keys, columns=['Long', 'Short', 'Flat'])

        df['Long'] = long
        df['Short'] = short
        df['Flat'] = flat

        if strip is not None: keys = [k.replace(strip, '') for k in keys]

        df.index = keys
        df_trades.index = keys
        # df = df.sort_index()

        style = self.create_style("", "")

        try:
            style.file_output = self.DUMP_PATH + self.FINAL_STRATEGY + ' (Strategy signal proportion).png'
            style.html_file_output = self.DUMP_PATH + self.FINAL_STRATEGY + ' (Strategy signal proportion).html'
            self.chart.plot(self.reduce_plot(df),
                            chart_type='bar',
                            style=style)

            style.file_output = self.DUMP_PATH + self.FINAL_STRATEGY + ' (Strategy trade no).png'
            style.html_file_output = self.DUMP_PATH + self.FINAL_STRATEGY + ' (Strategy trade no).html'
            self.chart.plot(self.reduce_plot(df_trades),
                            chart_type='bar',
                            style=style)

        except:
            pass

    def plot_strategy_leverage(self):
        style = self.create_style("Leverage", "Strategy Leverage")

        try:
            self.chart.plot(self.reduce_plot(self._strategy_leverage),
                            chart_type='line',
                            style=style)
        except:
            pass

    def plot_strategy_group_benchmark_pnl(self, strip=None):

        style = self.create_style("", "Group Benchmark PnL - cumulative")

        strat_list = self._strategy_group_benchmark_pnl.columns  #.sort_values()

        for line in strat_list:
            self.logger.info(line)

        # plot cumulative line of returns
        self.chart.plot(self.reduce_plot(self._strategy_group_benchmark_pnl),
                        style=style)

        # needs write stats flag turned on
        try:
            keys = self._strategy_group_benchmark_ret_stats.keys()
            ir = []

            for key in keys:
                ir.append(
                    self._strategy_group_benchmark_ret_stats[key].inforatio()
                    [0])

            if strip is not None: keys = [k.replace(strip, '') for k in keys]

            ret_stats = pandas.DataFrame(index=keys, data=ir, columns=['IR'])
            # ret_stats = ret_stats.sort_index()
            style.file_output = self.DUMP_PATH + self.FINAL_STRATEGY + ' (Group Benchmark PnL - IR) ' + style.SCALE_FACTOR + '.png'
            style.html_file_output = self.DUMP_PATH + self.FINAL_STRATEGY + ' (Group Benchmark PnL - IR) ' + style.SCALE_FACTOR + '.html'
            style.display_brand_label = False

            self.chart.plot(ret_stats, chart_type='bar', style=style)

        except:
            pass

    def plot_strategy_group_benchmark_annualised_pnl(self, cols=None):
        # TODO - unfinished, needs checking!

        if cols is None:
            cols = self._strategy_group_benchmark_annualised_pnl.columns

        style = self.create_style("", "Group Benchmark Annualised PnL")
        style.color = [
            'red', 'blue', 'purple', 'gray', 'yellow', 'green', 'pink'
        ]

        self.chart.plot(self.reduce_plot(
            self._strategy_group_benchmark_annualised_pnl[cols]),
                        chart_type='line',
                        style=style)

    def plot_strategy_group_leverage(self):

        style = self.create_style("Leverage", "Group Leverage")
        self.chart.plot(self.reduce_plot(self._strategy_group_leverage),
                        chart_type='line',
                        style=style)

    def plot_strategy_signals(self, date=None, strip=None):

        ######## plot signals
        strategy_signal = self._strategy_signal
        strategy_signal = 100 * (strategy_signal)

        if date is None:
            last_day = strategy_signal.ix[-1].transpose().to_frame()
        else:
            if not (isinstance(date, list)):
                date = [date]

            last_day = []

            for d in date:
                last_day.append(strategy_signal.ix[d].transpose().to_frame())

            last_day = pandas.concat(last_day, axis=1)
            last_day = last_day.sort_index(axis=1)

        if strip is not None:
            last_day.index = [x.replace(strip, '') for x in last_day.index]

        style = self.create_style("positions (% portfolio notional)",
                                  "Positions")
        self.chart.plot(last_day, chart_type='bar', style=style)

    def create_style(self, title, file_add):
        style = Style()

        style.title = self.FINAL_STRATEGY + " " + title
        style.display_legend = True
        style.scale_factor = self.SCALE_FACTOR
        style.source = self.CHART_SOURCE

        if self.DEFAULT_PLOT_ENGINE not in ['plotly', 'cufflinks'
                                            ] and self.SAVE_FIGURES:
            style.file_output = self.DUMP_PATH + self.FINAL_STRATEGY + ' (' + file_add + ') ' + str(
                style.scale_factor) + '.png'

        style.html_file_output = self.DUMP_PATH + self.FINAL_STRATEGY + ' (' + file_add + ') ' + str(
            style.scale_factor) + '.html'

        try:
            style.silent_display = self.SILENT_DISPLAY
        except:
            pass

        return style
コード例 #24
0
ファイル: eventstudy.py プロジェクト: dmunozc/finmarketpy
class HistEconDataFactory(object):

    def __init__(self, market_data_generator = None):
        self.logger = LoggerManager().getLogger(__name__)

        self._all_econ_tickers = pandas.read_csv(DataConstants().all_econ_tickers)
        self._econ_country_codes = pandas.read_csv(DataConstants().econ_country_codes)
        self._econ_country_groups = pandas.read_csv(DataConstants().econ_country_groups)

        if market_data_generator is None:
            self.market_data_generator = MarketDataGenerator()
        else:
            self.market_data_generator = market_data_generator

    def get_economic_data_history(self, start_date, finish_date, country_group, data_type,
        source = 'fred', cache_algo = "internet_load_return"):

        if isinstance(country_group, list):
            pretty_country_names = country_group
        else:
            # get all the country names in the country_group
            pretty_country_names = list(self._econ_country_groups[
                self._econ_country_groups["Country Group"] == country_group]['Country'])

        # construct the pretty tickers
        pretty_tickers = [x + '-' + data_type for x in pretty_country_names]

        # get vendor tickers
        vendor_tickers = []

        for pretty_ticker in pretty_tickers:
            vendor_ticker = list(self._all_econ_tickers[
                                         self._all_econ_tickers["Full Code"] == pretty_ticker][source].values)

            if vendor_ticker == []:
                vendor_ticker = None
                self.logger.error('Could not find match for ' + pretty_ticker)
            else:
                vendor_ticker = vendor_ticker[0]

            vendor_tickers.append(vendor_ticker)

        vendor_fields = ['close']

        if source == 'bloomberg': vendor_fields = ['PX_LAST']

        md_request = MarketDataRequest(
                start_date = start_date,                            # start date
                finish_date = finish_date,                          # finish date
                category = 'economic',
                freq = 'daily',                                     # intraday data
                data_source = source,                               # use Bloomberg as data source
                cut = 'LOC',
                tickers = pretty_tickers,
                fields = ['close'],                                 # which fields to download
                vendor_tickers = vendor_tickers,
                vendor_fields = vendor_fields,                      # which Bloomberg fields to download
                cache_algo = cache_algo)                            # how to return data

        return self.market_data_generator.fetch_market_data(md_request)

    def grasp_coded_entry(self, df, index):
        df = df.ix[index:].stack()
        df = df.reset_index()
        df.columns = ['Date', 'Name', 'Val']

        countries = df['Name']

        countries = [x.split('-', 1)[0] for x in countries]

        df['Code'] = sum(
            [list(self._econ_country_codes[self._econ_country_codes["Country"] == x]['Code']) for x in countries],
            [])

        return df
コード例 #25
0
ファイル: datavendorweb.py プロジェクト: alamtahsin/findatapy
class DataVendorALFRED(DataVendor):

    def __init__(self):
        super(DataVendorALFRED, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, market_data_request):
        market_data_request_vendor = self.construct_vendor_market_data_request(market_data_request)

        self.logger.info("Request ALFRED/FRED data")

        data_frame = self.download_daily(market_data_request_vendor)

        if data_frame is None or data_frame.index is []: return None

        # convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            returned_tickers = data_frame.columns

        if data_frame is not None:
            # tidy up tickers into a format that is more easily translatable
            # we can often get multiple fields returned (even if we don't ask for them!)
            # convert to lower case
            returned_fields = [(x.split('.')[1]) for x in returned_tickers]
            returned_tickers = [(x.split('.')[0]) for x in returned_tickers]

            try:
                fields = self.translate_from_vendor_field(returned_fields, market_data_request)
                tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request)
            except:
                print('error')

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        self.logger.info("Completed request from ALFRED/FRED for " + str(ticker_combined))

        return data_frame

    def download_daily(self, market_data_request):
        trials = 0

        data_frame_list = []
        data_frame_release = []

        # TODO refactor this code, a bit messy at the moment!
        for i in range(0, len(market_data_request.tickers)):
            while (trials < 5):
                try:
                    fred = Fred(api_key=DataConstants().fred_api_key)

                    # acceptable fields: close, actual-release, release-date-time-full
                    if 'close' in market_data_request.fields and 'release-date-time-full' in market_data_request.fields:
                        data_frame = fred.get_series_all_releases(market_data_request.tickers[i],
                                                                  observation_start=market_data_request.start_date,
                                                                  observation_end=market_data_request.finish_date)

                        data_frame.columns = ['Date', market_data_request.tickers[i] + '.release-date-time-full',
                                              market_data_request.tickers[i] + '.close']

                        data_frame = data_frame.sort_values(by=['Date', market_data_request.tickers[i] + '.release-date-time-full'])
                        data_frame = data_frame.drop_duplicates(subset=['Date'], keep='last')
                        data_frame = data_frame.set_index(['Date'])

                        filter = Filter()
                        data_frame = filter.filter_time_series_by_date(market_data_request.start_date,
                                                                       market_data_request.finish_date, data_frame)

                        data_frame_list.append(data_frame)
                    elif 'close' in market_data_request.fields:

                        data_frame = fred.get_series(series_id=market_data_request.tickers[i],
                                                     observation_start=market_data_request.start_date,
                                                     observation_end=market_data_request.finish_date)

                        data_frame = pandas.DataFrame(data_frame)
                        data_frame.columns = [market_data_request.tickers[i] + '.close']
                        data_frame_list.append(data_frame)

                    if 'first-revision' in market_data_request.fields:
                        data_frame = fred.get_series_first_revision(market_data_request.tickers[i],
                                                                    observation_start=market_data_request.start_date,
                                                                    observation_end=market_data_request.finish_date)

                        data_frame = pandas.DataFrame(data_frame)
                        data_frame.columns = [market_data_request.tickers[i] + '.first-revision']

                        filter = Filter()
                        data_frame = filter.filter_time_series_by_date(market_data_request.start_date,
                                                                       market_data_request.finish_date, data_frame)

                        data_frame_list.append(data_frame)

                    if 'actual-release' in market_data_request.fields and 'release-date-time-full' in market_data_request.fields:
                        data_frame = fred.get_series_all_releases(market_data_request.tickers[i],
                                                                  observation_start=market_data_request.start_date,
                                                                  observation_end=market_data_request.finish_date)

                        data_frame.columns = ['Date', market_data_request.tickers[i] + '.release-date-time-full',
                                              market_data_request.tickers[i] + '.actual-release']

                        data_frame = data_frame.sort_values(by=['Date', market_data_request.tickers[i] + '.release-date-time-full'])
                        data_frame = data_frame.drop_duplicates(subset=['Date'], keep='first')
                        data_frame = data_frame.set_index(['Date'])

                        filter = Filter()
                        data_frame = filter.filter_time_series_by_date(market_data_request.start_date,
                                                                       market_data_request.finish_date, data_frame)

                        data_frame_list.append(data_frame)

                    elif 'actual-release' in market_data_request.fields:
                        data_frame = fred.get_series_first_release(market_data_request.tickers[i],
                                                                   observation_start=market_data_request.start_date,
                                                                   observation_end=market_data_request.finish_date)

                        data_frame = pandas.DataFrame(data_frame)
                        data_frame.columns = [market_data_request.tickers[i] + '.actual-release']

                        filter = Filter()
                        data_frame = filter.filter_time_series_by_date(market_data_request.start_date,
                                                                       market_data_request.finish_date, data_frame)

                        data_frame_list.append(data_frame)

                    elif 'release-date-time-full' in market_data_request.fields:
                        data_frame = fred.get_series_all_releases(market_data_request.tickers[i],
                                                                  observation_start=market_data_request.start_date,
                                                                  observation_end=market_data_request.finish_date)

                        data_frame = data_frame['realtime_start']

                        data_frame = pandas.DataFrame(data_frame)
                        data_frame.columns = [market_data_request.tickers[i] + '.release-date-time-full']

                        data_frame.index = data_frame[market_data_request.tickers[i] + '.release-date-time-full']
                        data_frame = data_frame.sort()
                        data_frame = data_frame.drop_duplicates()

                        filter = Filter()
                        data_frame_release.append(filter.filter_time_series_by_date(market_data_request.start_date,
                                                                       market_data_request.finish_date, data_frame))

                    break
                except:
                    trials = trials + 1
                    self.logger.info("Attempting... " + str(trials) + " request to download from ALFRED/FRED")

            if trials == 5:
                self.logger.error("Couldn't download from ALFRED/FRED after several attempts!")

        calc = Calculations()

        data_frame1 = calc.pandas_outer_join(data_frame_list)
        data_frame2 = calc.pandas_outer_join(data_frame_release)

        data_frame = pandas.concat([data_frame1, data_frame2], axis=1)

        return data_frame
コード例 #26
0
from findatapy.market import Market, MarketDataGenerator, MarketDataRequest
from findatapy.timeseries import Calculations
from findatapy.util import LoggerManager

from finmarketpy.economics import EventStudy

# choose run_example = 0 for everything
# run_example = 1 - download recent NFP times and do event study for USD/JPY

run_example = 0

###### download recent NFP times and do event study for USD/JPY (using Bloomberg data)
if run_example == 1 or run_example == 0:

    logger = LoggerManager().getLogger(__name__)

    import datetime

    from datetime import timedelta

    ###### Get intraday data for USD/JPY from the past few months from Bloomberg, NFP date/times from Bloomberg
    ###### then plot intraday price action around NFP for EUR/USD

    start_date = datetime.date.today() - timedelta(days=180)
    finish_date = datetime.datetime.utcnow()

    market = Market(market_data_generator=MarketDataGenerator())

    # Fetch NFP times from Bloomberg
    md_request = MarketDataRequest(
コード例 #27
0
ファイル: datavendorweb.py プロジェクト: alamtahsin/findatapy
class DataVendorBOE(DataVendor):

    def __init__(self):
        super(DataVendorBOE, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, market_data_request):
        market_data_request_vendor = self.construct_vendor_market_data_request(market_data_request)

        self.logger.info("Request BOE data")

        data_frame = self.download_daily(market_data_request_vendor)

        if data_frame is None or data_frame.index is []: return None

        # convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            returned_tickers = data_frame.columns

        if data_frame is not None:
            # tidy up tickers into a format that is more easily translatable
            # we can often get multiple fields returned (even if we don't ask for them!)
            # convert to lower case
            returned_fields = [(x.split(' - ')[1]).lower().replace(' ', '-') for x in returned_tickers]
            returned_fields = [x.replace('value', 'close') for x in returned_fields]  # special case for close

            returned_tickers = [x.replace('.', '/') for x in returned_tickers]
            returned_tickers = [x.split(' - ')[0] for x in returned_tickers]

            fields = self.translate_from_vendor_field(returned_fields, market_data_request)
            tickers = self.translate_from_vendor_ticker(returned_tickers, market_data_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        self.logger.info("Completed request from BOE.")

        return data_frame

    def download_daily(self, market_data_request):
        trials = 0

        data_frame = None

        while (trials < 5):
            try:
                # TODO

                break
            except:
                trials = trials + 1
                self.logger.info("Attempting... " + str(trials) + " request to download from BOE")

        if trials == 5:
            self.logger.error("Couldn't download from ONS after several attempts!")

        return data_frame
コード例 #28
0
    def get_data_vendor(self, source):
        """Loads appropriate data service class

        Parameters
        ----------
        source : str
            the data service to use "bloomberg", "quandl", "yahoo", "google", "fred" etc.
            we can also have forms like "bloomberg-boe" separated by hyphens

        Returns
        -------
        DataVendor
        """
        logger = LoggerManager().getLogger(__name__)

        data_vendor = None

        try:
            source = source.split("-")[0]
        except:
            logger.error("Was data source specified?")

            return None

        if source == 'bloomberg':
            try:
                from findatapy.market.datavendorbbg import DataVendorBBGOpen
                data_vendor = DataVendorBBGOpen()
            except:
                logger.warn("Bloomberg needs to be installed")

        elif source == 'quandl':
            from findatapy.market.datavendorweb import DataVendorQuandl
            data_vendor = DataVendorQuandl()

        elif source == 'eikon':
            from findatapy.market.datavendorweb import DataVendorEikon
            data_vendor = DataVendorEikon()

        elif source == 'ons':
            from findatapy.market.datavendorweb import DataVendorONS
            data_vendor = DataVendorONS()

        elif source == 'boe':
            from findatapy.market.datavendorweb import DataVendorBOE
            data_vendor = DataVendorBOE()

        elif source == 'dukascopy':
            from findatapy.market.datavendorweb import DataVendorDukasCopy
            data_vendor = DataVendorDukasCopy()

        elif source == 'fxcm':
            from findatapy.market.datavendorweb import DataVendorFXCM
            data_vendor = DataVendorFXCM()

        elif source == 'alfred':
            from findatapy.market.datavendorweb import DataVendorALFRED
            data_vendor = DataVendorALFRED()

        elif source == 'yahoo':
            from findatapy.market.datavendorweb import DataVendorYahoo
            data_vendor = DataVendorYahoo()

        elif source in ['google', 'fred', 'oecd', 'eurostat', 'edgar-index']:
            from findatapy.market.datavendorweb import DataVendorPandasWeb
            data_vendor = DataVendorPandasWeb()

        elif source == 'bitcoincharts':
            from findatapy.market.datavendorweb import DataVendorBitcoincharts
            data_vendor = DataVendorBitcoincharts()
        elif source == 'poloniex':
            from findatapy.market.datavendorweb import DataVendorPoloniex
            data_vendor = DataVendorPoloniex()
        elif source == 'binance':
            from findatapy.market.datavendorweb import DataVendorBinance
            data_vendor = DataVendorBinance()
        elif source == 'bitfinex':
            from findatapy.market.datavendorweb import DataVendorBitfinex
            data_vendor = DataVendorBitfinex()
        elif source == 'gdax':
            from findatapy.market.datavendorweb import DataVendorGdax
            data_vendor = DataVendorGdax()
        elif source == 'kraken':
            from findatapy.market.datavendorweb import DataVendorKraken
            data_vendor = DataVendorKraken()
        elif source == 'bitmex':
            from findatapy.market.datavendorweb import DataVendorBitmex
            data_vendor = DataVendorBitmex()
        elif '.csv' in source or '.h5' in source or '.parquet' in source:
            from findatapy.market.datavendorweb import DataVendorFlatFile
            data_vendor = DataVendorFlatFile()
        elif source == 'alphavantage':
            from findatapy.market.datavendorweb import DataVendorAlphaVantage
            data_vendor = DataVendorAlphaVantage()
        elif source == 'huobi':
            from findatapy.market.datavendorweb import DataVendorHuobi
            data_vendor = DataVendorHuobi()

        # TODO add support for other data sources (like Reuters)

        return data_vendor
コード例 #29
0
ファイル: datavendorweb.py プロジェクト: alamtahsin/findatapy
 def __init__(self):
     super(DataVendorPandasWeb, self).__init__()
     self.logger = LoggerManager().getLogger(__name__)
コード例 #30
0
ファイル: twitter.py プロジェクト: vishalbelsare/findatapy
 def __init__(self, *args, **kwargs):
     self.logger = LoggerManager().getLogger(__name__)
コード例 #31
0
ファイル: market.py プロジェクト: prasunanand/findatapy
    def _get_individual_fx_cross(self, market_data_request):
        cr = market_data_request.cross
        type = market_data_request.type
        freq = market_data_request.freq

        base = cr[0:3]
        terms = cr[3:6]

        if (type == 'spot'):
            # Non-USD crosses
            if base != 'USD' and terms != 'USD':
                base_USD = self.fxconv.correct_notation('USD' + base)
                terms_USD = self.fxconv.correct_notation('USD' + terms)

                # TODO check if the cross exists in the database

                # Download base USD cross
                market_data_request.tickers = base_USD
                market_data_request.category = 'fx'

                base_vals = self._market_data_generator.fetch_market_data(
                    market_data_request)

                # Download terms USD cross
                market_data_request.tickers = terms_USD
                market_data_request.category = 'fx'

                terms_vals = self._market_data_generator.fetch_market_data(
                    market_data_request)

                # If quoted USD/base flip to get USD terms
                if (base_USD[0:3] == 'USD'):
                    base_vals = 1 / base_vals

                # If quoted USD/terms flip to get USD terms
                if (terms_USD[0:3] == 'USD'):
                    terms_vals = 1 / terms_vals

                base_vals.columns = ['temp']
                terms_vals.columns = ['temp']

                cross_vals = base_vals.div(terms_vals, axis='index')
                cross_vals.columns = [cr + '.close']

                base_vals.columns = [base_USD + '.close']
                terms_vals.columns = [terms_USD + '.close']
            else:
                # if base == 'USD': non_USD = terms
                # if terms == 'USD': non_USD = base

                correct_cr = self.fxconv.correct_notation(cr)

                market_data_request.tickers = correct_cr
                market_data_request.category = 'fx'

                cross_vals = self._market_data_generator.fetch_market_data(
                    market_data_request)

                # Special case for USDUSD!
                if base + terms == 'USDUSD':
                    if freq == 'daily':
                        cross_vals = pd.DataFrame(1,
                                                  index=cross_vals.index,
                                                  columns=cross_vals.columns)
                        filter = Filter()
                        cross_vals = filter.filter_time_series_by_holidays(
                            cross_vals, cal='WEEKDAY')
                else:
                    # Flip if not convention (eg. JPYUSD)
                    if (correct_cr != cr):
                        cross_vals = 1 / cross_vals

                # cross_vals = self._market_data_generator.harvest_time_series(market_data_request)
                cross_vals.columns = [cr + '.close']

        elif type[0:3] == "tot":
            if freq == 'daily':
                # Download base USD cross
                market_data_request.tickers = base + 'USD'
                market_data_request.category = 'fx-' + type

                if type[0:3] == "tot":
                    base_vals = self._market_data_generator.fetch_market_data(
                        market_data_request)

                # Download terms USD cross
                market_data_request.tickers = terms + 'USD'
                market_data_request.category = 'fx-' + type

                if type[0:3] == "tot":
                    terms_vals = self._market_data_generator.fetch_market_data(
                        market_data_request)

                # base_rets = self._calculations.calculate_returns(base_vals)
                # terms_rets = self._calculations.calculate_returns(terms_vals)

                # Special case for USDUSD case (and if base or terms USD are USDUSD
                if base + terms == 'USDUSD':
                    base_rets = self._calculations.calculate_returns(base_vals)
                    cross_rets = pd.DataFrame(0,
                                              index=base_rets.index,
                                              columns=base_rets.columns)
                elif base + 'USD' == 'USDUSD':
                    cross_rets = -self._calculations.calculate_returns(
                        terms_vals)
                elif terms + 'USD' == 'USDUSD':
                    cross_rets = self._calculations.calculate_returns(
                        base_vals)
                else:
                    base_rets = self._calculations.calculate_returns(base_vals)
                    terms_rets = self._calculations.calculate_returns(
                        terms_vals)

                    cross_rets = base_rets.sub(terms_rets.iloc[:, 0], axis=0)

                # First returns of a time series will by NaN, given we don't know previous point
                cross_rets.iloc[0] = 0

                cross_vals = self._calculations.create_mult_index(cross_rets)
                cross_vals.columns = [cr + '-' + type + '.close']

            elif freq == 'intraday':
                LoggerManager().getLogger(__name__).info(
                    'Total calculated returns for intraday not implemented yet'
                )
                return None

        return cross_vals
コード例 #32
0
ファイル: eventstudy.py プロジェクト: dmunozc/finmarketpy
 def __init__(self):
     self.logger = LoggerManager().getLogger(__name__)
     return