コード例 #1
0
    def load_assets(self, br = None):
        ##### FILL IN WITH YOUR ASSET DATA
        from findatapy.util.loggermanager import  LoggerManager
        logger = LoggerManager().getLogger(__name__)

        # for FX basket
        full_bkt    = ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD',
                       'NZDUSD', 'USDCHF', 'USDNOK', 'USDSEK']

        basket_dict = {}

        for i in range(0, len(full_bkt)):
            basket_dict[full_bkt[i]] = [full_bkt[i]]

        basket_dict['FX trend'] = full_bkt

        br = self.load_parameters(br = br)

        logger.info("Loading asset data...")

        vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXJPUS', 'FRED/DEXUSUK', 'FRED/DEXUSAL', 'FRED/DEXCAUS',
                          'FRED/DEXUSNZ', 'FRED/DEXSZUS', 'FRED/DEXNOUS', 'FRED/DEXSDUS']

        market_data_request = MarketDataRequest(
                    start_date = br.start_date,                     # start date
                    finish_date = br.finish_date,                   # finish date
                    freq = 'daily',                                 # daily data
                    data_source = 'quandl',                         # use Quandl as data source
                    tickers = full_bkt,                             # ticker (Thalesians)
                    fields = ['close'],                                 # which fields to download
                    vendor_tickers = vendor_tickers,                    # ticker (Quandl)
                    vendor_fields = ['close'],                          # which Bloomberg fields to download
                    cache_algo = 'cache_algo_return')                # how to return data

        asset_df = self.market.fetch_market(market_data_request)

        # if web connection fails read from CSV
        if asset_df is None:
            import pandas

            asset_df = pandas.read_csv("d:/fxcta.csv", index_col=0, parse_dates=['Date'],
                                       date_parser = lambda x: pandas.datetime.strptime(x, '%Y-%m-%d'))

        # signalling variables
        spot_df = asset_df
        spot_df2 = None

        # asset_df

        return asset_df, spot_df, spot_df2, basket_dict
コード例 #2
0
    def __init__(self, engine = ChartConstants().chartfactory_default_engine):
        self.logger = LoggerManager().getLogger(__name__)
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' '
        self.SCALE_FACTOR = 3
        self.DEFAULT_PLOT_ENGINE = engine
        self.chart = Chart(engine=self.DEFAULT_PLOT_ENGINE)

        return
コード例 #3
0
    def __init__(self):
        super(MarketDataRequest, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        self.__signal_name = None

        # output parameters for backtest (should we add returns statistics on legends, write CSVs with returns etc.)
        self.__plot_start = None
        self.__calc_stats = True
        self.__write_csv = False
        self.__write_csv_pnl = False
        self.__plot_interim = False
        self.__include_benchmark = False

        self.__tech_params = TechParams()

        # default parameters for portfolio level vol adjustment
        self.__portfolio_vol_adjust = False
        self.__portfolio_vol_period_shift = 0
        self.__portfolio_vol_rebalance_freq = None
        self.__portfolio_vol_resample_freq = None
        self.__portfolio_vol_resample_type = 'mean'
        self.__portfolio_vol_target = 0.1           # 10% vol target
        self.__portfolio_vol_max_leverage = None
        self.__portfolio_vol_periods = 20
        self.__portfolio_vol_obs_in_year = 252

        # default parameters for signal level vol adjustment
        self.__signal_vol_adjust = False
        self.__signal_vol_period_shift = 0
        self.__signal_vol_rebalance_freq = None
        self.__signal_vol_resample_freq = None      
        self.__signal_vol_resample_type = 'mean'
        self.__signal_vol_target = 0.1              # 10% vol target
        self.__signal_vol_max_leverage = None
        self.__signal_vol_periods = 20
        self.__signal_vol_obs_in_year = 252

        # portfolio notional size
        self.__portfolio_notional_size = None
        self.__portfolio_combination = None
        self.__portfolio_combination_weights = None
        
        # parameters for maximum position limits (expressed as whole portfolio)
        self.__max_net_exposure = None
        self.__max_abs_exposure = None

        self.__position_clip_rebalance_freq = None
        self.__position_clip_resample_freq = None  # by default apply max position criterion on last business day of month
        self.__position_clip_resample_type = 'mean'
        self.__position_clip_period_shift = 0

        # take profit and stop loss parameters
        self.__take_profit = None
        self.__stop_loss = None

        # should we delay the signal?
        self.__signal_delay = 0
コード例 #4
0
    def process_message(self, msg):

        constants = DataConstants()
        # Process received events

        # SLOW loop (careful, not all the fields will be returned every time hence need to include the field name in the tuple)
        # perhaps try to run in parallel?
        logger = LoggerManager().getLogger(__name__)

        implementation = 'simple'

        if implementation == 'simple':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            data = defaultdict(dict)
            #
            # # FASTER avoid calling getValue/getElement methods in blpapi, very slow, better to cache variables
            for i in range(fieldData.numValues()):
                mini_field_data = fieldData.getValue(i)
                date = mini_field_data.getElement(0).getValue()

                for j in range(1, mini_field_data.numElements()):
                    field_value = mini_field_data.getElement(j)

                    data[(str(field_value.name()),
                          ticker)][date] = field_value.getValue()

            # ORIGINAL repeated calling getValue/getElement much slower
            # for i in range(fieldData.numValues()):
            #     for j in range(1, fieldData.getValue(i).numElements()):
            #         data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \
            #             = fieldData.getValue(i).getElement(j).getValue()
        elif implementation == 'py4j':
            pass

            # TODO Py4J
            # from findatapy.market.bbgloop import bbgloop
            # from py4j.java_gateway import JavaGateway

            # gateway = JavaGateway()
            # data = gateway.entry_point.parseFieldDataArray(msg)
        elif implementation == 'cython':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            from findatapy.market.bbgloop import bbgloop

            data = bbgloop(fieldData, ticker)
        elif implementation == 'numba':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            from findatapy.market.bbgloop_numba import bbgloop_numba

            data = bbgloop_numba(fieldData, ticker)
            # TODO cython

        data_frame = pd.DataFrame(data)

        # if obsolete ticker could return no values
        if (not (data_frame.empty)):
            # data_frame.columns = pd.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            data_frame.index = pd.to_datetime(data_frame.index)
            logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) +
                        ' - ' + str(data_frame.index[-1]))
        else:
            return None

        return data_frame
コード例 #5
0
    def load_ticker(self, market_data_request):
        """Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """
        constants = DataConstants()

        market_data_request = MarketDataRequest(md_request=market_data_request)
        market_data_request_vendor = self.construct_vendor_market_data_request(
            market_data_request)

        data_frame = None

        logger = LoggerManager().getLogger(__name__)
        logger.info("Request Bloomberg data")

        # Do we need daily or intraday data?
        if (market_data_request.freq
                in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']):

            # Work out the fields which need to be downloaded via Bloomberg ref request (BDP) and
            # those that can be downloaded via Historical request (BDH)
            ref_fields = []
            ref_vendor_fields = []

            # Get user defined list of BBG fields/vendor fields which need to be downloaded by BDP
            bbg_ref_fields = list(constants.bbg_ref_fields.keys())
            bbg_ref_vendor_fields = list(constants.bbg_ref_fields.values())

            for i in range(0, len(market_data_request.fields)):
                if market_data_request.fields[i] in bbg_ref_fields \
                        or market_data_request_vendor.fields[i] in bbg_ref_vendor_fields:
                    ref_fields.append(market_data_request.fields[i])
                    ref_vendor_fields.append(
                        market_data_request_vendor.fields[i])

            non_ref_fields = []
            non_ref_vendor_fields = []

            for i in range(0, len(market_data_request.fields)):
                if market_data_request.fields[i] not in bbg_ref_fields \
                        and market_data_request_vendor.fields[i] not in bbg_ref_vendor_fields:
                    non_ref_fields.append(market_data_request.fields[i])
                    non_ref_vendor_fields.append(
                        market_data_request_vendor.fields[i])

            # For certain cases, need to use ReferenceDataRequest
            # eg. for events times/dates, last tradeable date fields (when specified)
            if len(ref_fields) > 0:

                # Careful: make sure you copy the market data request object (when threading, altering that can
                # cause concurrency issues!)
                old_fields = copy.deepcopy(market_data_request.fields)
                old_vendor_fields = copy.deepcopy(
                    market_data_request_vendor.fields)

                # md_request = MarketDataRequest(md_request=market_data_request_copy)

                market_data_request.fields = ref_fields
                market_data_request.vendor_fields = ref_vendor_fields
                market_data_request_vendor = self.construct_vendor_market_data_request(
                    market_data_request)

                # Just select those reference fields to download via reference
                datetime_data_frame = self.get_reference_data(
                    market_data_request_vendor, market_data_request)

                # Download all the other event or non-ref fields (uses HistoricalDataRequest to Bloomberg)
                # concatenate with date time fields
                if len(non_ref_fields) > 0:

                    market_data_request.fields = non_ref_fields
                    market_data_request.vendor_fields = non_ref_vendor_fields
                    market_data_request_vendor = self.construct_vendor_market_data_request(
                        market_data_request)

                    events_data_frame = self.get_daily_data(
                        market_data_request, market_data_request_vendor)

                    col = events_data_frame.index.name
                    events_data_frame = events_data_frame.reset_index(
                        drop=False)

                    data_frame = pd.concat(
                        [events_data_frame, datetime_data_frame], axis=1)
                    temp = data_frame[col]
                    del data_frame[col]
                    data_frame.index = temp
                else:
                    data_frame = datetime_data_frame

                market_data_request.fields = copy.deepcopy(old_fields)
                market_data_request_vendor.fields = copy.deepcopy(
                    old_vendor_fields)

            # For all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg
            else:
                data_frame = self.get_daily_data(market_data_request,
                                                 market_data_request_vendor)

                # if data_frame is not None:
                #     # Convert fields with release-dt to dates (special case!) and assume everything else numerical
                #     for c in data_frame.columns:
                #         try:
                #             if 'release-dt' in c:
                #                 data_frame[c] = (data_frame[c]).astype('int').astype(str).apply(
                #                         lambda x: pd.to_datetime(x, format='%Y%m%d'))
                #             else:
                #                 data_frame[c] = pd.to_numeric(data_frame[c])
                #         except:
                #             pass

        # Assume one ticker only for intraday data and use IntradayDataRequest to Bloomberg
        if (market_data_request.freq
                in ['tick', 'intraday', 'second', 'minute', 'hourly']):
            market_data_request_vendor.tickers = market_data_request_vendor.tickers[
                0]

            if market_data_request.freq in ['tick', 'second']:
                data_frame = self.download_tick(market_data_request_vendor)
            else:
                data_frame = self.download_intraday(market_data_request_vendor)

            if data_frame is not None:
                if data_frame.empty:
                    try:
                        logger.info("No tickers returned for: " +
                                    market_data_request_vendor.tickers)
                    except:
                        pass

                    return None

                cols = data_frame.columns.values

                import pytz

                try:
                    data_frame = data_frame.tz_localize(pytz.utc)
                except:
                    data_frame = data_frame.tz_convert(pytz.utc)

                cols = market_data_request.tickers[0] + "." + cols
                data_frame.columns = cols

        logger.info("Completed request from Bloomberg.")

        return data_frame
コード例 #6
0
    def write_time_series_cache_to_disk(
            self,
            fname,
            data_frame,
            engine='hdf5_fixed',
            append_data=False,
            db_server=constants.db_server,
            db_port=constants.db_port,
            username=constants.db_username,
            password=constants.db_password,
            filter_out_matching=None,
            timeout=10,
            use_cache_compression=constants.use_cache_compression,
            parquet_compression=constants.parquet_compression,
            md_request=None,
            ticker=None):
        """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        engine : str
            'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this
            'hdf5_table' - use HDF5 table format, slower but can append to
            'parquet' - use Parquet
            'arctic' - use Arctic/MongoDB database
            'redis' - use Redis
        append_data : bool
            False - write a fresh copy of data on disk each time
            True - append data to disk
        db_server : str
            Database server for arctic (default: '127.0.0.1')
        timeout : int
            Number of seconds to do timeout
        """

        logger = LoggerManager().getLogger(__name__)

        if md_request is not None:
            fname = self.path_join(
                fname, md_request.create_category_key(ticker=ticker))

        # default HDF5 format
        hdf5_format = 'fixed'

        if 'hdf5' in engine:
            hdf5_format = engine.split('_')[1]
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            data_frame.columns = self.find_replace_chars(
                data_frame.columns, _invalid_chars, _replace_chars)
            data_frame.columns = ['A_' + x for x in data_frame.columns]

            data_frame['DTS_'] = pandas.to_datetime(data_frame.index,
                                                    unit='ns')

            bcolzpath = self.get_bcolz_filename(fname)
            shutil.rmtree(bcolzpath, ignore_errors=True)
            zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath)
        elif (engine == 'redis'):

            fname = os.path.basename(fname).replace('.', '_')

            # Will fail if Redis is not installed
            try:
                r = redis.StrictRedis(host=db_server,
                                      port=db_port,
                                      db=0,
                                      socket_timeout=timeout,
                                      socket_connect_timeout=timeout)

                ping = r.ping()

                # If Redis is alive, try pushing to it
                if ping:
                    if data_frame is not None:
                        if isinstance(data_frame, pandas.DataFrame):
                            mem = data_frame.memory_usage(deep='deep').sum()
                            mem_float = round(
                                float(mem) / (1024.0 * 1024.0), 3)

                            if mem_float < 500:
                                # msgpack/blosc is deprecated
                                # r.set(fname, data_frame.to_msgpack(compress='blosc'))

                                # now uses pyarrow
                                context = pa.default_serialization_context()

                                ser = context.serialize(data_frame).to_buffer()

                                if use_cache_compression:
                                    comp = pa.compress(ser,
                                                       codec='lz4',
                                                       asbytes=True)
                                    siz = len(ser)  # siz = 3912

                                    r.set('comp_' + str(siz) + '_' + fname,
                                          comp)
                                else:
                                    r.set(fname, ser.to_pybytes())

                                logger.info("Pushed " + fname + " to Redis")
                            else:
                                logger.warn("Did not push " + fname +
                                            " to Redis, given size")
                    else:
                        logger.info("Object " + fname +
                                    " is empty, not pushed to Redis.")
                else:
                    logger.warning("Didn't push " + fname +
                                   " to Redis given not running")

            except Exception as e:
                logger.warning("Couldn't push " + fname + " to Redis: " +
                               str(e))

        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            logger.info('Load Arctic/MongoDB library: ' + fname)

            if username is not None and password is not None:
                c = pymongo.MongoClient(
                    host="mongodb://" + username + ":" + password + "@" +
                    str(db_server) + ":" + str(db_port),
                    connect=False)  # , username=username, password=password)
            else:
                c = pymongo.MongoClient(host="mongodb://" + str(db_server) +
                                        ":" + str(db_port),
                                        connect=False)

            store = Arctic(c,
                           socketTimeoutMS=socketTimeoutMS,
                           serverSelectionTimeoutMS=socketTimeoutMS,
                           connectTimeoutMS=socketTimeoutMS)

            database = None

            try:
                database = store[fname]
            except:
                pass

            if database is None:
                store.initialize_library(fname, audit=False)
                logger.info("Created MongoDB library: " + fname)
            else:
                logger.info("Got MongoDB library: " + fname)

            # Access the library
            library = store[fname]

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            if filter_out_matching is not None:
                cols = data_frame.columns

                new_cols = []

                for col in cols:
                    if filter_out_matching not in col:
                        new_cols.append(col)

                data_frame = data_frame[new_cols]

            # Problems with Arctic when writing timezone to disk sometimes, so strip
            data_frame = data_frame.copy().tz_localize(None)

            try:
                # Can duplicate values if we have existing dates
                if append_data:
                    library.append(fname, data_frame)
                else:
                    library.write(fname, data_frame)

                c.close()
                logger.info("Written MongoDB library: " + fname)
            except Exception as e:
                logger.warning("Couldn't write MongoDB library: " + fname +
                               " " + str(e))

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # append data only works for HDF5 stored as tables (but this is much slower than fixed format)
            # removes duplicated entries at the end
            if append_data:
                store = pandas.HDFStore(h5_filename,
                                        format=hdf5_format,
                                        complib="zlib",
                                        complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                # get last row which matches and remove everything after that (because append
                # function doesn't check for duplicated rows
                nrows = len(store['data'].index)
                last_point = data_frame.index[-1]

                i = nrows - 1

                while (i > 0):
                    read_index = store.select('data', start=i,
                                              stop=nrows).index[0]

                    if (read_index <= last_point): break

                    i = i - 1

                # remove rows at the end, which are duplicates of the incoming time series
                store.remove(key='data', start=i, stop=nrows)
                store.put(key='data',
                          value=data_frame,
                          format=hdf5_format,
                          append=True)
                store.close()
            else:
                h5_filename_temp = self.get_h5_filename(fname + ".temp")

                # delete the old copy
                try:
                    os.remove(h5_filename_temp)
                except:
                    pass

                store = pandas.HDFStore(h5_filename_temp,
                                        complib="zlib",
                                        complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                store.put(key='data', value=data_frame, format=hdf5_format)
                store.close()

                # delete the old copy
                try:
                    os.remove(h5_filename)
                except:
                    pass

                # once written to disk rename
                os.rename(h5_filename_temp, h5_filename)

            logger.info("Written HDF5: " + fname)

        elif (engine == 'parquet'):
            if '.parquet' not in fname:
                if fname[-5:] != '.gzip':
                    fname = fname + '.parquet'

            self.to_parquet(data_frame,
                            fname,
                            aws_region=constants.aws_region,
                            parquet_compression=parquet_compression)
            # data_frame.to_parquet(fname, compression=parquet_compression)

            logger.info("Written Parquet: " + fname)
        elif engine == 'csv':
            if '.csv' not in fname:
                fname = fname + '.csv'

            data_frame.to_csv(fname)

            logger.info("Written CSV: " + fname)
コード例 #7
0
class IOEngine(object):
    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)

    ### functions to handle Excel on disk
    def write_time_series_to_excel(self,
                                   fname,
                                   sheet,
                                   data_frame,
                                   create_new=False):
        """
        write_time_series_to_excel - writes Pandas data frame to disk in Excel format

        Parameters
        ----------
        fname : str
            Excel filename to be written to
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        create_new : boolean
            to create a new Excel file
        """

        if (create_new):
            writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
        else:
            if os.path.isfile(fname):
                book = load_workbook(fname)
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
                writer.book = book
                writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
            else:
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')

        data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter')

        writer.save()
        writer.close()

    def write_time_series_to_excel_writer(self, writer, sheet, data_frame):
        """
        write_time_series_to_excel_writer - writes Pandas data frame to disk in Excel format for a writer

        Parameters
        ----------
        writer : ExcelWriter
            File handle to use for writing Excel file to disk
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        """
        data_frame.to_excel(writer, sheet, engine='xlsxwriter')

    def read_excel_data_frame(self,
                              f_name,
                              excel_sheet,
                              freq,
                              cutoff=None,
                              dateparse=None,
                              postfix='.close',
                              intraday_tz='UTC'):
        """
        read_excel_data_frame - Reads Excel from disk into DataFrame

        Parameters
        ----------
        f_name : str
            Excel file path to read
        freq : str
            Frequency of data to read (intraday/daily etc)
        cutoff : DateTime (optional)
            end date to read up to
        dateparse : str (optional)
            date parser to use
        postfix : str (optional)
            postfix to add to each columns
        intraday_tz : str
            timezone of file if uses intraday data

        Returns
        -------
        DataFrame
        """

        return self.read_csv_data_frame(f_name,
                                        freq,
                                        cutoff=cutoff,
                                        dateparse=dateparse,
                                        postfix=postfix,
                                        intraday_tz=intraday_tz,
                                        excel_sheet=excel_sheet)

    def remove_time_series_cache_on_disk(self,
                                         fname,
                                         engine='hdf5_fixed',
                                         db_server='127.0.0.1'):

        if 'hdf5' in engine:
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            pass
        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 10 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load MongoDB library: ' + fname)

            c = pymongo.MongoClient(db_server, connect=False)
            store = Arctic(c,
                           socketTimeoutMS=socketTimeoutMS,
                           serverSelectionTimeoutMS=socketTimeoutMS)
            store.delete_library(fname)

            c.close()

            self.logger.info("Deleted MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # delete the old copy
            try:
                os.remove(h5_filename)
            except:
                pass

    ### functions to handle HDF5 on disk
    def write_time_series_cache_to_disk(self,
                                        fname,
                                        data_frame,
                                        engine='hdf5_fixed',
                                        append_data=False,
                                        db_server='127.0.0.1',
                                        filter_out_matching=None):
        """
        write_time_series_cache_to_disk - writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        """

        # default HDF5 format
        hdf5_format = 'fixed'

        if 'hdf5' in engine:
            hdf5_format = engine.split('_')[1]
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            data_frame.columns = self.find_replace_chars(
                data_frame.columns, _invalid_chars, _replace_chars)
            data_frame.columns = ['A_' + x for x in data_frame.columns]

            data_frame['DTS_'] = pandas.to_datetime(data_frame.index,
                                                    unit='ns')

            bcolzpath = self.get_bcolz_filename(fname)
            shutil.rmtree(bcolzpath, ignore_errors=True)
            zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath)
        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load MongoDB library: ' + fname)

            c = pymongo.MongoClient(db_server, connect=False)
            store = Arctic(c,
                           socketTimeoutMS=socketTimeoutMS,
                           serverSelectionTimeoutMS=socketTimeoutMS)

            database = None

            try:
                database = store[fname]
            except:
                pass

            if database is None:
                store.initialize_library(fname, audit=False)
                self.logger.info("Created MongoDB library: " + fname)
            else:
                self.logger.info("Got MongoDB library: " + fname)

            # Access the library
            library = store[fname]

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            if filter_out_matching is not None:
                cols = data_frame.columns

                new_cols = []

                for col in cols:
                    if filter_out_matching not in col:
                        new_cols.append(col)

                data_frame = data_frame[new_cols]

            # can duplicate values if we have existing dates
            if append_data:
                library.append(fname, data_frame)
            else:
                library.write(fname, data_frame)

            c.close()

            self.logger.info("Written MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # append data only works for HDF5 stored as tables (but this is much slower than fixed format)
            # removes duplicated entries at the end
            if append_data:
                store = pandas.HDFStore(h5_filename,
                                        format=hdf5_format,
                                        complib="blosc",
                                        complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                # get last row which matches and remove everything after that (because append
                # function doesn't check for duplicated rows
                nrows = len(store['data'].index)
                last_point = data_frame.index[-1]

                i = nrows - 1

                while (i > 0):
                    read_index = store.select('data', start=i,
                                              stop=nrows).index[0]

                    if (read_index <= last_point): break

                    i = i - 1

                # remove rows at the end, which are duplicates of the incoming time series
                store.remove(key='data', start=i, stop=nrows)
                store.put(key='data',
                          value=data_frame,
                          format=hdf5_format,
                          append=True)
                store.close()
            else:
                h5_filename_temp = self.get_h5_filename(fname + ".temp")

                # delete the old copy
                try:
                    os.remove(h5_filename_temp)
                except:
                    pass

                store = pandas.HDFStore(h5_filename_temp,
                                        format=hdf5_format,
                                        complib="blosc",
                                        complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                store.put(key='data', value=data_frame, format=hdf5_format)
                store.close()

                # delete the old copy
                try:
                    os.remove(h5_filename)
                except:
                    pass

                # once written to disk rename
                os.rename(h5_filename_temp, h5_filename)

    def get_h5_filename(self, fname):
        """
        get_h5_filename - Strips h5 off filename returning first portion of filename

        Parameters
        ----------
        fname : str
            h5 filename to strip

        Returns
        -------
        str
        """
        if fname[-3:] == '.h5':
            return fname

        return fname + ".h5"

    def get_bcolz_filename(self, fname):
        """
        get_bcolz_filename - Strips h5 off filename returning first portion of filename

        Parameters
        ----------
        fname : str
            h5 filename to strip

        Returns
        -------
        str
        """
        if fname[-6:] == '.bcolz':
            return fname

        return fname + ".bcolz"

    def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields=None):
        """
        write_r_compatible_hdf_dataframe - Write a DataFrame to disk in as an R compatible HDF5 file

        Parameters
        ----------
        data_frame : DataFrame
            data frame to be written
        fname : str
            file path to be written
        fields : list(str)
            columns to be written
        """
        fname_r = self.get_h5_filename(fname)

        self.logger.info("About to dump R binary HDF5 - " + fname_r)
        data_frame32 = data_frame.astype('float32')

        if fields is None:
            fields = data_frame32.columns.values

        # decompose date/time into individual fields (easier to pick up in R)
        data_frame32['Year'] = data_frame.index.year
        data_frame32['Month'] = data_frame.index.month
        data_frame32['Day'] = data_frame.index.day
        data_frame32['Hour'] = data_frame.index.hour
        data_frame32['Minute'] = data_frame.index.minute
        data_frame32['Second'] = data_frame.index.second
        data_frame32['Millisecond'] = data_frame.index.microsecond / 1000

        data_frame32 = data_frame32[[
            'Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'
        ] + fields]

        cols = data_frame32.columns

        store_export = pandas.HDFStore(fname_r)
        store_export.put('df_for_r', data_frame32, data_columns=cols)
        store_export.close()

    def read_time_series_cache_from_disk(self,
                                         fname,
                                         engine='hdf5',
                                         start_date=None,
                                         finish_date=None,
                                         db_server='127.0.0.1'):
        """
        read_time_series_cache_from_disk - Reads time series cache from disk in either HDF5 or bcolz

        Parameters
        ----------
        fname : str
            file to be read from

        Returns
        -------
        DataFrame
        """

        if (engine == 'bcolz'):
            try:
                name = self.get_bcolz_filename(fname)
                zlens = bcolz.open(rootdir=name)
                data_frame = zlens.todataframe()

                data_frame.index = pandas.DatetimeIndex(data_frame['DTS_'])
                data_frame.index.name = 'Date'
                del data_frame['DTS_']

                # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas
                data_frame.columns = self.find_replace_chars(
                    data_frame.columns, _replace_chars, _invalid_chars)
                data_frame.columns = [x[2:] for x in data_frame.columns]

                return data_frame
            except:
                return None
        elif (engine == 'arctic'):
            socketTimeoutMS = 2 * 1000

            import pymongo
            from arctic import Arctic

            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load MongoDB library: ' + fname)

            c = pymongo.MongoClient(db_server, connect=False)

            store = Arctic(c,
                           socketTimeoutMS=socketTimeoutMS,
                           serverSelectionTimeoutMS=socketTimeoutMS)

            # Access the library
            library = store[fname]

            if start_date is None and finish_date is None:
                item = library.read(fname)
            else:
                from arctic.date import DateRange
                item = library.read(fname,
                                    date_range=DateRange(
                                        start_date, finish_date))

            c.close()

            self.logger.info('Read ' + fname)

            return item.data
        elif os.path.isfile(self.get_h5_filename(fname)):
            store = pandas.HDFStore(self.get_h5_filename(fname))
            data_frame = store.select("data")

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            store.close()

            return data_frame

        return None

    ### functions for CSV reading and writing
    def write_time_series_to_csv(self, csv_path, data_frame):
        data_frame.to_csv(csv_path)

    def read_csv_data_frame(self,
                            f_name,
                            freq,
                            cutoff=None,
                            dateparse=None,
                            postfix='.close',
                            intraday_tz='UTC',
                            excel_sheet=None):
        """
        read_csv_data_frame - Reads CSV/Excel from disk into DataFrame

        Parameters
        ----------
        f_name : str
            CSV/Excel file path to read
        freq : str
            Frequency of data to read (intraday/daily etc)
        cutoff : DateTime (optional)
            end date to read up to
        dateparse : str (optional)
            date parser to use
        postfix : str (optional)
            postfix to add to each columns
        intraday_tz : str (optional)
            timezone of file if uses intraday data
        excel_sheet : str (optional)
            Excel sheet to be read

        Returns
        -------
        DataFrame
        """

        if (freq == 'intraday'):

            if dateparse is None:
                dateparse = lambda x: datetime.datetime(*map(
                    int,
                    [x[6:10], x[3:5], x[0:2], x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'dukascopy':
                dateparse = lambda x: datetime.datetime(*map(
                    int,
                    [x[0:4], x[5:7], x[8:10], x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'c':
                # use C library for parsing dates, several hundred times quicker
                # requires compilation of library to install
                import ciso8601
                dateparse = lambda x: ciso8601.parse_datetime(x)

            if excel_sheet is None:
                data_frame = pandas.read_csv(f_name,
                                             index_col=0,
                                             parse_dates=True,
                                             date_parser=dateparse)
            else:
                data_frame = pandas.read_excel(f_name,
                                               excel_sheet,
                                               index_col=0,
                                               na_values=['NA'])

            data_frame = data_frame.astype('float32')
            data_frame.index.names = ['Date']

            old_cols = data_frame.columns
            new_cols = []

            # add '.close' to each column name
            for col in old_cols:
                new_cols.append(col + postfix)

            data_frame.columns = new_cols
        else:
            # daily data
            if 'events' in f_name:

                data_frame = pandas.read_csv(f_name)

                # very slow conversion
                data_frame = data_frame.convert_objects(convert_dates='coerce')

            else:
                if excel_sheet is None:
                    try:
                        data_frame = pandas.read_csv(f_name,
                                                     index_col=0,
                                                     parse_dates=["DATE"],
                                                     date_parser=dateparse)
                    except:
                        data_frame = pandas.read_csv(f_name,
                                                     index_col=0,
                                                     parse_dates=["Date"],
                                                     date_parser=dateparse)
                else:
                    data_frame = pandas.read_excel(f_name,
                                                   excel_sheet,
                                                   index_col=0,
                                                   na_values=['NA'])

        # convert Date to Python datetime
        # datetime data_frame['Date1'] = data_frame.index

        # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S')
        # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]),
        #                                        int(x[12:13]), int(x[15:16]), int(x[18:19])))

        # data_frame.index = data_frame['Date1']
        # data_frame.drop('Date1')

        # slower method: data_frame.index = pandas.to_datetime(data_frame.index)

        if (freq == 'intraday'):
            # assume time series are already in UTC and assign this (can specify other time zones)
            data_frame = data_frame.tz_localize(intraday_tz)

        # end cutoff date
        if cutoff is not None:
            if (isinstance(cutoff, str)):
                cutoff = parse(cutoff)

            data_frame = data_frame.loc[data_frame.index < cutoff]

        return data_frame

    def find_replace_chars(self, array, to_find, replace_with):

        for i in range(0, len(to_find)):
            array = [x.replace(to_find[i], replace_with[i]) for x in array]

        return array

    def convert_csv_data_frame(self,
                               f_name,
                               category,
                               freq,
                               cutoff=None,
                               dateparse=None):
        """
        convert_csv_data_frame - Converts CSV file to HDF5 file

        Parameters
        ----------
        f_name : str
            File name to be read
        category : str
            data category of file (used in HDF5 filename)
        freq : str
            intraday/daily frequency (used in HDF5 filename)
        cutoff : DateTime (optional)
            filter dates up to here
        dateparse : str
            date parser to use
        """

        self.logger.info("About to read... " + f_name)

        data_frame = self.read_csv_data_frame(f_name,
                                              freq,
                                              cutoff=cutoff,
                                              dateparse=dateparse)

        category_f_name = self.create_cache_file_name(category)

        self.write_time_series_cache_to_disk(category_f_name, data_frame)

    def clean_csv_file(self, f_name):
        """
        clean_csv_file - Cleans up CSV file (removing empty characters) before writing back to disk

        Parameters
        ----------
        f_name : str
            CSV file to be cleaned
        """

        with codecs.open(f_name, 'rb', 'utf-8') as myfile:
            data = myfile.read()

            # clean file first if dirty
            if data.count('\x00'):
                self.logger.info('Cleaning CSV...')

                with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of:
                    of.write(data.replace('\x00', ''))

                shutil.move(f_name + '.tmp', f_name)

    def create_cache_file_name(self, filename):
        return DataConstants().folder_time_series_data + "/" + filename
コード例 #8
0
ファイル: filter.py プロジェクト: pkan0583/findatapy
class Filter(object):

    _time_series_cache = {}  # shared across all instances of object!

    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        return

    def filter_time_series(self,
                           market_data_request,
                           data_frame,
                           pad_columns=False):
        """
        filter_time_series - Filters a time series given a set of criteria (like start/finish date and tickers)

        Parameters
        ----------
        market_data_request : MarketDataRequest
            defining time series filtering
        data_frame : DataFrame
            time series to be filtered
        pad_columns : boolean
            true, non-existant columns with nan

        Returns
        -------
        DataFrame
        """
        start_date = market_data_request.start_date
        finish_date = market_data_request.finish_date

        data_frame = self.filter_time_series_by_date(start_date, finish_date,
                                                     data_frame)

        # filter by ticker.field combinations requested
        columns = self.create_tickers_fields_list(market_data_request)

        if (pad_columns):
            data_frame = self.pad_time_series_columns(columns, data_frame)
        else:
            data_frame = self.filter_time_series_by_columns(
                columns, data_frame)

        return data_frame

    def create_calendar_bus_days(self, start_date, end_date, cal='FX'):
        """
        create_calendar_bus_days - Creates a calendar of business days)

        Parameters
        ----------
        start_date : DateTime
            start date of calendar
        end_date : DataFrame
            finish date of calendar
        cal : str
            business calendar to use

        Returns
        -------
        list
        """
        hols = self.get_holidays(start_date, end_date, cal)
        index = pandas.bdate_range(start=start_date, end=end_date, freq='D')

        return [x for x in index if x not in hols]

    def get_holidays(self, start_date, end_date, cal='FX'):
        """
        get_holidays - Gets the holidays for a given calendar

        Parameters
        ----------
        start_date : DateTime
            start date of calendar
        end_date : DataFrame
            finish date of calendar
        cal : str
            business calendar to use

        Returns
        -------
        list
        """

        # TODO use Pandas CustomBusinessDays to get more calendars
        holidays_list = []

        if cal == 'FX':
            # filter for Christmas & New Year's Day
            for i in range(1970, 2020):
                holidays_list.append(str(i) + "-12-25")
                holidays_list.append(str(i) + "-01-01")

        if cal == 'WEEKDAY':
            bday = CustomBusinessDay(weekmask='Sat Sun')

            holidays_list = pandas.date_range(start_date, end_date, freq=bday)

        # holidays_list = pandas.to_datetime(holidays_list).order()
        holidays_list = pandas.to_datetime(holidays_list).sort_values()

        # floor start date
        start = np.datetime64(start_date) - np.timedelta64(1, 'D')

        # ceiling end date
        end = np.datetime64(end_date) + np.timedelta64(1, 'D')

        holidays_list = [x for x in holidays_list if x >= start and x <= end]

        return pandas.to_datetime(holidays_list)

    def filter_time_series_by_holidays(self, data_frame, cal='FX'):
        """
        filter_time_series_by_holidays - Removes holidays from a given time series

        Parameters
        ----------
        data_frame : DataFrame
            data frame to be filtered
        cal : str
            business calendar to use

        Returns
        -------
        DataFrame
        """

        # optimal case for weekdays: remove Saturday and Sunday
        if (cal == 'WEEKDAY'):
            return data_frame.ix[data_frame.index.dayofweek <= 4]

        # select only those holidays in the sample
        holidays_start = self.get_holidays(data_frame.index[0],
                                           data_frame.index[-1], cal)

        if (holidays_start.size == 0):
            return data_frame

        holidays_end = holidays_start + np.timedelta64(1, 'D')

        # floored_dates = data_frame.index.normalize()
        #
        # filter_by_index_start = floored_dates.searchsorted(holidays_start)
        # filter_by_index_end = floored_dates.searchsorted(holidays_end)
        #
        # indices_to_keep = []
        #
        # if filter_by_index_end[0] == 0:
        #     counter = filter_by_index_end[0] + 1
        #     start_index = 1
        # else:
        #     counter = 0
        #     start_index = 0
        #
        # for i in range(start_index, len(holidays_start)):
        #     indices = list(range(counter, filter_by_index_start[i] - 1))
        #     indices_to_keep = indices_to_keep + indices
        #
        #     counter = filter_by_index_end[i] + 1
        #
        # indices = list(range(counter, len(floored_dates)))
        # indices_to_keep = indices_to_keep + indices
        #
        # data_frame_filtered = data_frame.ix[indices_to_keep]

        data_frame_left = data_frame
        data_frame_filtered = []

        for i in range(0, len(holidays_start)):
            data_frame_temp = data_frame_left.ix[
                data_frame_left.index < holidays_start[i]]
            data_frame_left = data_frame_left.ix[
                data_frame_left.index >= holidays_end[i]]

            data_frame_filtered.append(data_frame_temp)

        data_frame_filtered.append(data_frame_left)

        return pandas.concat(data_frame_filtered)

    def filter_time_series_by_date(self, start_date, finish_date, data_frame):
        """
        filter_time_series_by_date - Filter time series by start/finish dates

        Parameters
        ----------
        start_date : DateTime
            start date of calendar
        finish_date : DataTime
            finish date of calendar
        data_frame : DataFrame
            data frame to be filtered

        Returns
        -------
        DataFrame
        """
        offset = 0  # inclusive

        return self.filter_time_series_by_date_offset(start_date, finish_date,
                                                      data_frame, offset)

    def filter_time_series_by_days(self, days, data_frame):
        """
        filter_time_series_by_date - Filter time series by start/finish dates

        Parameters
        ----------
        start_date : DateTime
            start date of calendar
        finish_date : DataTime
            finish date of calendar
        data_frame : DataFrame
            data frame to be filtered

        Returns
        -------
        DataFrame
        """
        offset = 0  # inclusive

        finish_date = datetime.datetime.utcnow()
        start_date = finish_date - timedelta(days=days)
        return self.filter_time_series_by_date_offset(start_date, finish_date,
                                                      data_frame, offset)

    def filter_time_series_by_date_exc(self, start_date, finish_date,
                                       data_frame):
        """
        filter_time_series_by_date_exc - Filter time series by start/finish dates (exclude start & finish dates)

        Parameters
        ----------
        start_date : DateTime
            start date of calendar
        finish_date : DataTime
            finish date of calendar
        data_frame : DataFrame
            data frame to be filtered

        Returns
        -------
        DataFrame
        """
        offset = 1  # exclusive of start finish date

        return self.filter_time_series_by_date_offset(start_date, finish_date,
                                                      data_frame, offset)

        # try:
        #     # filter by dates for intraday data
        #     if(start_date is not None):
        #         data_frame = data_frame.loc[start_date <= data_frame.index]
        #
        #     if(finish_date is not None):
        #         # filter by start_date and finish_date
        #         data_frame = data_frame.loc[data_frame.index <= finish_date]
        # except:
        #     # filter by dates for daily data
        #     if(start_date is not None):
        #         data_frame = data_frame.loc[start_date.date() <= data_frame.index]
        #
        #     if(finish_date is not None):
        #         # filter by start_date and finish_date
        #         data_frame = data_frame.loc[data_frame.index <= finish_date.date()]
        #
        # return data_frame

    def filter_time_series_by_date_offset(self, start_date, finish_date,
                                          data_frame, offset):
        """
        filter_time_series_by_date_offset - Filter time series by start/finish dates (and an offset)

        Parameters
        ----------
        start_date : DateTime
            start date of calendar
        finish_date : DataTime
            finish date of calendar
        data_frame : DataFrame
            data frame to be filtered
        offset : int
            offset to be applied

        Returns
        -------
        DataFrame
        """
        try:
            data_frame = self.filter_time_series_aux(start_date, finish_date,
                                                     data_frame, offset)
        except:
            # start_date = start_date.date()
            # finish_date = finish_date.date()
            # if isinstance(start_date, str):
            #     # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M'
            #     try:
            #         start_date = datetime.datetime.strptime(start_date, '%b %d %Y %H:%M')
            #     except:
            #         i = 0
            #
            # if isinstance(finish_date, str):
            #     # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M'
            #     try:
            #         finish_date = datetime.datetime.strptime(finish_date, '%b %d %Y %H:%M')
            #     except:
            #         i = 0

            try:
                start_date = start_date.date()
            except:
                pass

            try:
                finish_date = finish_date.date()
            except:
                pass

            # if we have dates stored as opposed to TimeStamps (ie. daily data), we use a simple (slower) method
            # for filtering daily data
            if (start_date is not None):
                data_frame = data_frame.loc[start_date < data_frame.index]

            if (finish_date is not None):
                # filter by start_date and finish_date
                data_frame = data_frame.loc[data_frame.index < finish_date]

        return data_frame

    def filter_time_series_aux(self, start_date, finish_date, data_frame,
                               offset):
        """
        filter_time_series_aux - Filter time series by start/finish dates (and an offset)

        Parameters
        ----------
        start_date : DateTime
            start date of calendar
        finish_date : DataTime
            finish date of calendar
        data_frame : DataFrame
            data frame to be filtered
        offset : int
            offset to be applied

        Returns
        -------
        DataFrame
        """
        start_index = 0
        finish_index = len(data_frame.index) - offset

        # filter by dates for intraday data
        if (start_date is not None):
            start_index = data_frame.index.searchsorted(start_date)

            if (0 <= start_index + offset < len(data_frame.index)):
                start_index = start_index + offset

                # data_frame = data_frame.ix[start_date < data_frame.index]

        if (finish_date is not None):
            finish_index = data_frame.index.searchsorted(finish_date)

            if (0 <= finish_index - offset < len(data_frame.index)):
                finish_index = finish_index - offset

                # data_frame = data_frame[data_frame.index < finish_date]

        return data_frame.ix[start_index:finish_index]

    def filter_time_series_by_time_of_day(self,
                                          hour,
                                          minute,
                                          data_frame,
                                          in_tz=None,
                                          out_tz=None):
        """
        filter_time_series_by_time_of_day - Filter time series by time of day

        Parameters
        ----------
        hour : int
            hour of day
        minute : int
            minute of day
        data_frame : DataFrame
            data frame to be filtered
        in_tz : str (optional)
            time zone of input data frame
        out_tz : str (optional)
            time zone of output data frame

        Returns
        -------
        DataFrame
        """
        if out_tz is not None:
            if in_tz is not None:
                data_frame = data_frame.tz_localize(pytz.timezone(in_tz))

            data_frame = data_frame.tz_convert(pytz.timezone(out_tz))

            # change internal representation of time
            data_frame.index = pandas.DatetimeIndex(data_frame.index.values)

        data_frame = data_frame[data_frame.index.minute == minute]
        data_frame = data_frame[data_frame.index.hour == hour]

        return data_frame

    def filter_time_series_by_minute_of_hour(self,
                                             minute,
                                             data_frame,
                                             in_tz=None,
                                             out_tz=None):
        """
        filter_time_series_by_minute_of_hour - Filter time series by minute of hour

        Parameters
        ----------
        minute : int
            minute of hour
        data_frame : DataFrame
            data frame to be filtered
        in_tz : str (optional)
            time zone of input data frame
        out_tz : str (optional)
            time zone of output data frame

        Returns
        -------
        DataFrame
        """
        if out_tz is not None:
            if in_tz is not None:
                data_frame = data_frame.tz_localize(pytz.timezone(in_tz))

            data_frame = data_frame.tz_convert(pytz.timezone(out_tz))

            # change internal representation of time
            data_frame.index = pandas.DatetimeIndex(data_frame.index.values)

        data_frame = data_frame[data_frame.index.minute == minute]

        return data_frame

    def filter_time_series_between_hours(self, start_hour, finish_hour,
                                         data_frame):
        """
        filter_time_series_between_hours - Filter time series between hours of the day

        Parameters
        ----------
        start_hour : int
            start of hour filter
        finish_hour : int
            finish of hour filter
        data_frame : DataFrame
            data frame to be filtered

        Returns
        -------
        DataFrame
        """

        data_frame = data_frame[data_frame.index.hour <= finish_hour]
        data_frame = data_frame[data_frame.index.hour >= start_hour]

        return data_frame

    def filter_time_series_by_columns(self, columns, data_frame):
        """
        filter_time_series_by_columns - Filter time series by certain columns

        Parameters
        ----------
        columns : list(str)
            start of hour filter
        data_frame : DataFrame
            data frame to be filtered

        Returns
        -------
        DataFrame
        """
        return data_frame[columns]

    def pad_time_series_columns(self, columns, data_frame):
        """
        pad_time_series - Selects time series from a dataframe and if necessary creates empty columns

        Parameters
        ----------
        columns : str
            columns to be included with this keyword
        data_frame : DataFrame
            data frame to be filtered

        Returns
        -------
        DataFrame
        """
        old_columns = data_frame.columns

        common_columns = [val for val in columns if val in old_columns]
        uncommon_columns = [val for val in columns if val not in old_columns]

        data_frame = data_frame[common_columns]

        if uncommon_columns != []:
            self.logger.info("Padding missing columns " +
                             str(uncommon_columns))

        for x in uncommon_columns:
            data_frame.loc[:, x] = np.nan

        return data_frame

    def filter_time_series_by_excluded_keyword(self, keyword, data_frame):
        """
        filter_time_series_by_excluded_keyword - Filter time series to exclude columns which contain keyword

        Parameters
        ----------
        keyword : str
            columns to be excluded with this keyword
        data_frame : DataFrame
            data frame to be filtered

        Returns
        -------
        DataFrame
        """
        columns = [elem for elem in data_frame.columns if keyword not in elem]

        return self.filter_time_series_by_columns(columns, data_frame)

    def filter_time_series_by_included_keyword(self, keyword, data_frame):
        """
        filter_time_series_by_included_keyword - Filter time series to include columns which contain keyword

        Parameters
        ----------
        keyword : str
            columns to be included with this keyword
        data_frame : DataFrame
            data frame to be filtered

        Returns
        -------
        DataFrame
        """
        columns = [elem for elem in data_frame.columns if keyword in elem]

        return self.filter_time_series_by_columns(columns, data_frame)

    def filter_time_series_by_minute_freq(self, freq, data_frame):
        """
        filter_time_series_by_minute_freq - Filter time series where minutes correspond to certain minute filter

        Parameters
        ----------
        freq : int
            minute frequency to be filtered
        data_frame : DataFrame
            data frame to be filtered

        Returns
        -------
        DataFrame
        """
        return data_frame.loc[data_frame.index.minute % freq == 0]

    def create_tickers_fields_list(self, market_data_request):
        """
        create_ticker_field_list - Creates a list of tickers concatenated with fields from a MarketDataRequest

        Parameters
        ----------
        market_data_request : MarketDataRequest
            request to be expanded

        Returns
        -------
        list(str)
        """
        tickers = market_data_request.tickers
        fields = market_data_request.fields

        if isinstance(tickers, str): tickers = [tickers]
        if isinstance(fields, str): fields = [fields]

        tickers_fields_list = []

        # create ticker.field combination for series we wish to return
        for f in fields:
            for t in tickers:
                tickers_fields_list.append(t + '.' + f)

        return tickers_fields_list

    def resample_time_series(self, data_frame, freq):
        return data_frame.asfreq(freq, method='pad')

    def resample_time_series_frequency(self,
                                       data_frame,
                                       data_resample_freq,
                                       data_resample_type='mean',
                                       fill_empties=False):
        # should we take the mean, first, last in our resample
        if data_resample_type == 'mean':
            data_frame_r = data_frame.resample(data_resample_freq).mean()
        elif data_resample_type == 'first':
            data_frame_r = data_frame.resample(data_resample_freq).first()
        elif data_resample_type == 'last':
            data_frame_r = data_frame.resample(data_resample_freq).last()
        else:
            # TODO implement other types
            return

        if fill_empties == True:
            data_frame, data_frame_r = data_frame.align(data_frame_r,
                                                        join='left',
                                                        axis=0)
            data_frame_r = data_frame_r.fillna(method='ffill')

        return data_frame_r

    def make_FX_1_min_working_days(self, data_frame):
        data_frame = data_frame.resample('1min').mean()
        data_frame = self.filter_time_series_by_holidays(data_frame, 'FX')
        data_frame = data_frame.fillna(method='ffill')
        data_frame = self.remove_out_FX_out_of_hours(data_frame)

        return data_frame

    def remove_out_FX_out_of_hours(self, data_frame):
        """
        remove_out_FX_out_of_hours - Filtered a time series for FX hours (ie. excludes 22h GMT Fri - 19h GMT Sun)

        Parameters
        ----------
        data_frame : DataFrame
            data frame with FX prices

        Returns
        -------
        list(str)
        """
        # assume data_frame is in GMT time
        # remove Fri after 22:00 GMT
        # remove Sat
        # remove Sun before 19:00 GMT

        # Monday = 0, ..., Sunday = 6
        data_frame = data_frame.ix[~((data_frame.index.dayofweek == 4) &
                                     (data_frame.index.hour > 22))]
        data_frame = data_frame.ix[~((data_frame.index.dayofweek == 5))]
        data_frame = data_frame.ix[~((data_frame.index.dayofweek == 6) &
                                     (data_frame.index.hour < 19))]

        return data_frame
コード例 #9
0
ファイル: fxconv.py プロジェクト: yashyennam/findatapy
 def __init__(self):
     self.logger = LoggerManager().getLogger(__name__)
     return
コード例 #10
0
class BBGLowLevelDaily(BBGLowLevelTemplate):
    def __init__(self):
        super(BBGLowLevelDaily, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

    def combine_slices(self, data_frame_cols, data_frame_slice):
        # data
        try:
            if (data_frame_slice.columns.get_level_values(1).values[0]
                    not in data_frame_cols):
                # return data_frame.join(data_frame_slice, how="outer")
                return data_frame_slice
        except Exception as e:
            self.logger.warn('Data slice empty ' + str(e))

            return None

        return None

    # populate options for Bloomberg request for asset daily request
    def fill_options(self, market_data_request):
        options = OptionsBBG()

        options.security = market_data_request.tickers
        options.startDateTime = market_data_request.start_date
        options.endDateTime = market_data_request.finish_date
        options.fields = market_data_request.fields

        options.overrides = market_data_request.overrides

        return options

    def process_message(self, msg):
        # Process received events

        # SLOW loop (careful, not all the fields will be returned every time hence need to include the field name in the tuple)
        # perhaps try to run in parallel?

        implementation = 'simple'

        if implementation == 'simple':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            data = defaultdict(dict)
            #
            # # FASTER avoid calling getValue/getElement methods in blpapi, very slow, better to cache variables
            for i in range(fieldData.numValues()):
                mini_field_data = fieldData.getValue(i)
                date = mini_field_data.getElement(0).getValue()

                for j in range(1, mini_field_data.numElements()):
                    field_value = mini_field_data.getElement(j)

                    data[(str(field_value.name()),
                          ticker)][date] = field_value.getValue()

            # ORIGINAL repeated calling getValue/getElement much slower
            # for i in range(fieldData.numValues()):
            #     for j in range(1, fieldData.getValue(i).numElements()):
            #         data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \
            #             = fieldData.getValue(i).getElement(j).getValue()
        elif implementation == 'py4j':
            pass

            # TODO Py4J
            # from findatapy.market.bbgloop import bbgloop
            # from py4j.java_gateway import JavaGateway

            # gateway = JavaGateway()
            # data = gateway.entry_point.parseFieldDataArray(msg)
        elif implementation == 'cython':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            from findatapy.market.bbgloop import bbgloop

            data = bbgloop(fieldData, ticker)
        elif implementation == 'numba':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            from findatapy.market.bbgloop_numba import bbgloop_numba

            data = bbgloop_numba(fieldData, ticker)
            # TODO cython

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not (data_frame.empty)):
            # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            data_frame.index = pandas.to_datetime(data_frame.index)
            self.logger.info("Read: " + ticker + ' ' +
                             str(data_frame.index[0]) + ' - ' +
                             str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue, options, cid):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("HistoricalDataRequest")

        request.set("startDate", options.startDateTime.strftime('%Y%m%d'))
        request.set("endDate", options.endDateTime.strftime('%Y%m%d'))

        # # only one security/eventType per request
        for field in options.fields:
            request.getElement("fields").appendValue(field)

        for security in options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Daily Request:" + str(request))
        session.sendRequest(request=request, correlationId=cid)
コード例 #11
0
    def __init__(self):
        super(DataVendorBBG, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
コード例 #12
0
class DataVendorBBG(DataVendor):
    """Abstract class for download of Bloomberg daily, intraday data and reference data.

    Implemented by:
        DataVendorBBGOpen - Adapted version of new Bloomberg Open API for Python which is recommended. Note that this
        requires compilation, via installed C++ compiler. For Python 3.5, this is Microsoft Visual Studio 2015.

        Or it is easier to install blpapi via conda

        Note: no longer supports COM API, which is slower and only 32 bit

    """

    # these fields are BDS style fields to be downloaded using Bloomberg's Reference Data interface
    list_of_ref_fields = [
        'release-date-time-full', 'last-tradeable-day',
        'futures-chain-tickers', 'futures-chain-last-trade-dates',
        'first-notice-date', 'first-tradeable-day', 'cal-non-settle-dates'
    ]

    list_of_ref_vendor_fields = [
        'ECO_FUTURE_RELEASE_DATE_LIST', 'LAST_TRADEABLE_DT', 'FUT_CHAIN',
        'FUT_CHAIN_LAST_TRADE_DATES', 'FUT_NOTICE_FIRST', 'FUT_FIRST_TRADE_DT',
        'CALENDAR_NON_SETTLEMENT_DATES'
    ]

    def __init__(self):
        super(DataVendorBBG, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

    # implement method in abstract superclass
    def load_ticker(self, market_data_request):
        """Retrieves market data from external data source (in this case Bloomberg)

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains all the various parameters detailing time series start and finish, tickers etc

        Returns
        -------
        DataFrame
        """
        market_data_request = MarketDataRequest(md_request=market_data_request)
        market_data_request_vendor = self.construct_vendor_market_data_request(
            market_data_request)

        data_frame = None
        self.logger.info("Request Bloomberg data")

        # do we need daily or intraday data?
        if (market_data_request.freq
                in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']):

            # work out the fields which need to be downloaded via Bloomberg ref request (BDP) and
            # those that can be downloaded via Historical request (BDH)
            ref_fields = []
            ref_vendor_fields = []

            for i in range(0, len(market_data_request.fields)):
                if market_data_request.fields[i] in self.list_of_ref_fields \
                        or market_data_request_vendor.fields[i] in self.list_of_ref_vendor_fields:
                    ref_fields.append(market_data_request.fields[i])
                    ref_vendor_fields.append(
                        market_data_request_vendor.fields[i])

            non_ref_fields = []
            non_ref_vendor_fields = []

            for i in range(0, len(market_data_request.fields)):
                if market_data_request.fields[i] not in self.list_of_ref_fields \
                        and market_data_request_vendor.fields[i] not in self.list_of_ref_vendor_fields:
                    non_ref_fields.append(market_data_request.fields[i])
                    non_ref_vendor_fields.append(
                        market_data_request_vendor.fields[i])

            # for certain cases, need to use ReferenceDataRequest
            # eg. for events times/dates, last tradeable date fields (when specified)
            if len(ref_fields) > 0:

                # careful: make sure you copy the market data request object (when threading, altering that can
                # cause concurrency issues!)
                old_fields = copy.deepcopy(market_data_request.fields)
                old_vendor_fields = copy.deepcopy(
                    market_data_request_vendor.fields)

                # market_data_request = MarketDataRequest(md_request=market_data_request_copy)

                market_data_request.fields = ref_fields
                market_data_request.vendor_fields = ref_vendor_fields
                market_data_request_vendor = self.construct_vendor_market_data_request(
                    market_data_request)

                # just select those reference fields to download via reference
                datetime_data_frame = self.get_reference_data(
                    market_data_request_vendor, market_data_request)

                # download all the other event or non-ref fields (uses HistoricalDataRequest to Bloomberg)
                # concatenate with date time fields
                if len(non_ref_fields) > 0:

                    market_data_request.fields = non_ref_fields
                    market_data_request.vendor_fields = non_ref_vendor_fields
                    market_data_request_vendor = self.construct_vendor_market_data_request(
                        market_data_request)

                    events_data_frame = self.get_daily_data(
                        market_data_request, market_data_request_vendor)

                    col = events_data_frame.index.name
                    events_data_frame = events_data_frame.reset_index(
                        drop=False)

                    data_frame = pandas.concat(
                        [events_data_frame, datetime_data_frame], axis=1)
                    temp = data_frame[col]
                    del data_frame[col]
                    data_frame.index = temp
                else:
                    data_frame = datetime_data_frame

                market_data_request.fields = copy.deepcopy(old_fields)
                market_data_request_vendor.fields = copy.deepcopy(
                    old_vendor_fields)

            # for all other daily/monthly/quarter data, we can use HistoricalDataRequest to Bloomberg
            else:
                data_frame = self.get_daily_data(market_data_request,
                                                 market_data_request_vendor)

                try:
                    # convert fields with release-dt to dates (special case!)
                    for c in data_frame.columns:
                        if 'release-dt' in c:
                            data_frame[c] = (
                                data_frame[c]).astype('int').astype(str).apply(
                                    lambda x: pandas.to_datetime(
                                        x, format='%Y%m%d'))
                except:
                    pass

        # assume one ticker only for intraday data and use IntradayDataRequest to Bloomberg
        if (market_data_request.freq
                in ['tick', 'intraday', 'second', 'minute', 'hourly']):
            market_data_request_vendor.tickers = market_data_request_vendor.tickers[
                0]

            if market_data_request.freq in ['tick', 'second']:
                data_frame = self.download_tick(market_data_request_vendor)
            else:
                data_frame = self.download_intraday(market_data_request_vendor)

            if data_frame is not None:
                if data_frame.empty:
                    try:
                        self.logger.info("No tickers returned for: " +
                                         market_data_request_vendor.tickers)
                    except:
                        pass

                    return None

                cols = data_frame.columns.values

                import pytz

                try:
                    data_frame = data_frame.tz_localize(pytz.utc)
                except:
                    data_frame = data_frame.tz_convert(pytz.utc)

                cols = market_data_request.tickers[0] + "." + cols
                data_frame.columns = cols

        self.logger.info("Completed request from Bloomberg.")

        return data_frame

    def get_daily_data(self, market_data_request, market_data_request_vendor):
        data_frame = self.download_daily(market_data_request_vendor)

        # convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            if data_frame.empty:
                self.logger.info("No tickers returned for...")

                try:
                    self.logger.info(str(market_data_request_vendor.tickers))
                except:
                    pass

                return None

            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

            # TODO if empty try downloading again a year later
            try:
                fields = self.translate_from_vendor_field(
                    returned_fields, market_data_request)
            except:
                print('t')

            tickers = self.translate_from_vendor_ticker(
                returned_tickers, market_data_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined
            data_frame.index.name = 'Date'

        return data_frame

    def get_reference_data(self, market_data_request_vendor,
                           market_data_request):
        end = datetime.utcnow()

        from datetime import timedelta
        end = end + timedelta(
            days=365
        )  # because very often we may with to download data about future calendar events
        #  end.replace(year = end.year + 1)

        market_data_request_vendor.finish_date = end

        self.logger.debug("Requesting ref for " +
                          market_data_request_vendor.tickers[0] + " etc.")

        data_frame = self.download_ref(market_data_request_vendor)

        self.logger.debug("Waiting for ref...")

        # convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            if data_frame.empty:
                return None

            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields,
                                                      market_data_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, market_data_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined

            # need to convert numerical and datetime columns separately post pandas 0.23
            data_frame = data_frame.apply(pandas.to_numeric, errors='ignore')
            data_frame = data_frame.apply(pandas.to_datetime, errors='ignore')

            # TODO coerce will be deprecated from pandas 0.23.0 onwards) so remove!
            # data_frame = data_frame.convert_objects(convert_dates = 'coerce', convert_numeric= 'coerce')

        return data_frame

    # implement method in abstract superclass
    @abc.abstractmethod
    def kill_session(self):
        return

    @abc.abstractmethod
    def download_tick(self, market_data_request):
        return

    @abc.abstractmethod
    def download_intraday(self, market_data_request):
        return

    @abc.abstractmethod
    def download_daily(self, market_data_request):
        return

    @abc.abstractmethod
    def download_ref(self, market_data_request):
        return
コード例 #13
0
class BacktestRequest(MarketDataRequest):
    """Contains parameters necessary to define a backtest, including start date, finish date, transaction cost, etc

    Used by TradingModel and Backtest to construct backtested returns for trading strategies

    """

    def __init__(self):
        super(MarketDataRequest, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        self.__signal_name = None
        self.__tech_params = TechParams()

    @property
    def signal_name(self):
        return self.__signal_name

    @signal_name.setter
    def signal_name(self, signal_name):
        self.__signal_name = signal_name

    @property
    def tech_params(self):
        return self.__tech_params

    @tech_params.setter
    def tech_params(self, tech_params):
        self.__tech_params = tech_params

    @property
    def spot_tc_bp(self):
        return self.__spot_tc_bp

    @spot_tc_bp.setter
    def spot_tc_bp(self, spot_tc_bp):
        self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0)

    @property
    def asset(self):
        return self.__asset

    @asset.setter
    def asset(self, asset):
        valid_asset = ['fx', 'multi-asset']

        if not asset in valid_asset:
            self.logger.warning(asset & " is not a defined asset.")

        self.__asset = asset

    @property
    def instrument(self):
        return self.__instrument

    @instrument.setter
    def instrument(self, instrument):
        valid_instrument = ['spot', 'futures', 'options']

        if not instrument in valid_instrument:
            self.logger.warning(instrument & " is not a defined trading instrument.")

        self.__instrument = instrument
コード例 #14
0
    def __init__(self):
        super(MarketDataRequest, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        self.__signal_name = None
        self.__tech_params = TechParams()
コード例 #15
0
class TradeAnalysis(object):
    """Applies some basic trade analysis for a trading strategy (as defined by TradingModel). Use PyFolio to create some
    basic trading statistics. Also allows you test multiple parameters for a specific strategy (like TC).

    """

    def __init__(self, engine = ChartConstants().chartfactory_default_engine):
        self.logger = LoggerManager().getLogger(__name__)
        self.DUMP_PATH = 'output_data/' + datetime.date.today().strftime("%Y%m%d") + ' '
        self.SCALE_FACTOR = 3
        self.DEFAULT_PLOT_ENGINE = engine
        self.chart = Chart(engine=self.DEFAULT_PLOT_ENGINE)

        return

    def run_strategy_returns_stats(self, trading_model, index = None, engine = 'pyfolio'):
        """Plots useful statistics for the trading strategy (using PyFolio)

        Parameters
        ----------
        trading_model : TradingModel
            defining trading strategy
        index: DataFrame
            define strategy by a time series

        """

        if index is None:
            pnl = trading_model.get_strategy_pnl()
        else:
            pnl = index

        tz = Timezone()
        calculations = Calculations()

        if engine == 'pyfolio':
            # PyFolio assumes UTC time based DataFrames (so force this localisation)
            try:
                pnl = tz.localise_index_as_UTC(pnl)
            except: pass

            # set the matplotlib style sheet & defaults
            # at present this only works in Matplotlib engine
            try:
                matplotlib.rcdefaults()
                plt.style.use(ChartConstants().chartfactory_style_sheet['chartpy-pyfolio'])
            except: pass

            # TODO for intraday strategies, make daily

            # convert DataFrame (assumed to have only one column) to Series
            pnl = calculations.calculate_returns(pnl)
            pnl = pnl.dropna()
            pnl = pnl[pnl.columns[0]]
            fig = pf.create_returns_tear_sheet(pnl, return_fig=True)

            try:
                plt.savefig (trading_model.DUMP_PATH + "stats.png")
            except: pass

            plt.show()
        elif engine == 'finmarketpy':

            # assume we have TradingModel
            # to do to take in a time series
            from chartpy import Canvas, Chart
            pnl = trading_model.plot_strategy_pnl(silent_plot=True)                         # plot the final strategy
            individual = trading_model.plot_strategy_group_pnl_trades(silent_plot=True)     # plot the individual trade P&Ls

            pnl_comp = trading_model.plot_strategy_group_benchmark_pnl(silent_plot=True)    # plot all the cumulative P&Ls of each component
            ir_comp = trading_model.plot_strategy_group_benchmark_pnl_ir(silent_plot=True)  # plot all the IR of each component

            leverage = trading_model.plot_strategy_leverage(silent_plot=True)               # plot the leverage of the portfolio
            ind_lev = trading_model.plot_strategy_group_leverage(silent_plot=True)          # plot all the individual leverages

            canvas = Canvas([[pnl, individual],
                             [pnl_comp, ir_comp],
                             [leverage, ind_lev]]
                             )

            canvas.generate_canvas(silent_display=False, canvas_plotter='plain')

    def run_excel_trade_report(self, trading_model, excel_file = 'model.xlsx'):
        """
        run_excel_trade_report - Creates an Excel spreadsheet with model returns and latest trades

        Parameters
        ----------
        trading_model : TradingModel
            defining trading strategy (can be a list)

        """

        trading_model_list = trading_model

        if not(isinstance(trading_model_list, list)):
            trading_model_list = [trading_model]

        writer = pandas.ExcelWriter(excel_file, engine='xlsxwriter')

        for tm in trading_model_list:
            strategy_name = tm.FINAL_STRATEGY
            returns = tm.get_strategy_group_benchmark_pnl()

            returns.to_excel(writer, sheet_name=strategy_name + ' rets', engine='xlsxwriter')

            # write raw position/trade sizes
            self.save_positions_trades(tm, tm.get_strategy_signal(),tm.get_strategy_trade(),
                                       'pos', 'trades', writer)

            if hasattr(tm, '_strategy_signal_notional'):
                # write position/trade sizes scaled by notional
                self.save_positions_trades(tm,
                                           tm.get_strategy_signal_notional(),
                                           tm.get_strategy_trade_notional(), 'pos - Not', 'trades - Not', writer)

            if hasattr(tm, '_strategy_signal_contracts'):
                # write position/trade sizes in terms of contract sizes
                self.save_positions_trades(tm,
                                           tm.get_strategy_signal_contracts(),
                                           tm.get_strategy_trade_contracts(), 'pos - Cont', 'trades - Cont', writer)

        # TODO Add summary sheet comparing return statistics for all the different models in the list

        writer.save()
        writer.close()

    def save_positions_trades(self, tm, signals, trades, signal_caption, trade_caption, writer):
        signals.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' hist ' + signal_caption, engine='xlsxwriter')

        if hasattr(tm, 'STRIP'):
            strip = tm.STRIP

        recent_signals = tm.grab_signals(signals, date=[-1, -2, -5, -10, -20], strip=strip)
        recent_trades = tm.grab_signals(trades, date=[-1, -2, -5, -10, -20], strip=strip)

        recent_signals.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' ' + signal_caption, engine='xlsxwriter')
        recent_trades.to_excel(writer, sheet_name=tm.FINAL_STRATEGY + ' ' + trade_caption, engine='xlsxwriter')

    def run_tc_shock(self, strategy, tc = None):
        if tc is None: tc = [0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0]

        parameter_list = [{'spot_tc_bp' : x } for x in tc]
        pretty_portfolio_names = [str(x) + 'bp' for x in tc]    # names of the portfolio
        parameter_type = 'TC analysis'                          # broad type of parameter name

        return self.run_arbitrary_sensitivity(strategy,
                                 parameter_list=parameter_list,
                                 pretty_portfolio_names=pretty_portfolio_names,
                                 parameter_type=parameter_type)

    ###### Parameters and signal generations (need to be customised for every model)
    def run_arbitrary_sensitivity(self, trading_model, parameter_list = None, parameter_names = None,
                                  pretty_portfolio_names = None, parameter_type = None):

        asset_df, spot_df, spot_df2, basket_dict = trading_model.load_assets()

        port_list = None
        ret_stats_list = []

        for i in range(0, len(parameter_list)):
            br = trading_model.load_parameters()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            trading_model.br = br   # for calculating signals

            signal_df = trading_model.construct_signal(spot_df, spot_df2, br.tech_params, br)

            backtest = Backtest()
            self.logger.info("Calculating... " + str(pretty_portfolio_names[i]))

            backtest.calculate_trading_PnL(br, asset_df, signal_df)
            ret_stats_list.append(backtest.get_portfolio_pnl_ret_stats())
            stats = str(backtest.get_portfolio_pnl_desc()[0])

            port = backtest.get_cumportfolio().resample('B').mean()
            port.columns = [str(pretty_portfolio_names[i]) + ' ' + stats]

            if port_list is None:
                port_list = port
            else:
                port_list = port_list.join(port)

        # reset the parameters of the strategy
        trading_model.br = trading_model.load_parameters()

        style = Style()

        ir = [t.inforatio()[0] for t in ret_stats_list]

        # if we have too many combinations remove legend and use scaled shaded colour
        # if len(port_list) > 10:
            # style.color = 'Blues'
            # style.display_legend = False

        # plot all the variations
        style.resample = 'B'
        style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + '.png'
        style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + '.html'
        style.scale_factor = self.SCALE_FACTOR
        style.title = trading_model.FINAL_STRATEGY + ' ' + parameter_type

        self.chart.plot(port_list, chart_type='line', style=style)

        # plot all the IR in a bar chart form (can be easier to read!)
        style = Style()
        style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + ' IR.png'
        style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' ' + parameter_type + ' IR.html'
        style.scale_factor = self.SCALE_FACTOR
        style.title = trading_model.FINAL_STRATEGY + ' ' + parameter_type
        summary = pandas.DataFrame(index = pretty_portfolio_names, data = ir, columns = ['IR'])

        self.chart.plot(summary, chart_type='bar', style=style)

        return port_list

    ###### Parameters and signal generations (need to be customised for every model)
    ###### Plot all the output seperately
    def run_arbitrary_sensitivity_separately(self, trading_model, parameter_list = None,
                                             pretty_portfolio_names = None, strip = None):

        # asset_df, spot_df, spot_df2, basket_dict = strat.fill_assets()
        final_strategy = trading_model.FINAL_STRATEGY

        for i in range(0, len(parameter_list)):
            br = trading_model.fill_backtest_request()

            current_parameter = parameter_list[i]

            # for calculating P&L
            for k in current_parameter.keys():
                setattr(br, k, current_parameter[k])

            trading_model.FINAL_STRATEGY = final_strategy + " " + pretty_portfolio_names[i]

            self.logger.info("Calculating... " + pretty_portfolio_names[i])
            trading_model.br = br
            trading_model.construct_strategy(br = br)

            trading_model.plot_strategy_pnl()
            trading_model.plot_strategy_leverage()
            trading_model.plot_strategy_group_benchmark_pnl(strip = strip)

        # reset the parameters of the strategy
        trading_model.br = trading_model.fill_backtest_request()
        trading_model.FINAL_STRATEGY = final_strategy

    def run_day_of_month_analysis(self, trading_model):
        from finmarketpy.economics.seasonality import Seasonality

        calculations = Calculations()
        seas = Seasonality()
        trading_model.construct_strategy()
        pnl = trading_model.get_strategy_pnl()

        # get seasonality by day of the month
        pnl = pnl.resample('B').mean()
        rets = calculations.calculate_returns(pnl)
        bus_day = seas.bus_day_of_month_seasonality(rets, add_average = True)

        # get seasonality by month
        pnl = pnl.resample('BM').mean()
        rets = calculations.calculate_returns(pnl)
        month = seas.monthly_seasonality(rets)

        self.logger.info("About to plot seasonality...")
        style = Style()

        # Plotting spot over day of month/month of year
        style.color = 'Blues'
        style.scale_factor = self.SCALE_FACTOR
        style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.png'
        style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality day of month.html'
        style.title = trading_model.FINAL_STRATEGY + ' day of month seasonality'
        style.display_legend = False
        style.color_2_series = [bus_day.columns[-1]]
        style.color_2 = ['red'] # red, pink
        style.linewidth_2 = 4
        style.linewidth_2_series = [bus_day.columns[-1]]
        style.y_axis_2_series = [bus_day.columns[-1]]

        self.chart.plot(bus_day, chart_type='line', style=style)

        style = Style()

        style.scale_factor = self.SCALE_FACTOR
        style.file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.png'
        style.html_file_output = self.DUMP_PATH + trading_model.FINAL_STRATEGY + ' seasonality month of year.html'
        style.title = trading_model.FINAL_STRATEGY + ' month of year seasonality'

        self.chart.plot(month, chart_type='line', style=style)

        return month
コード例 #16
0
ファイル: market.py プロジェクト: 42Trading/findatapy
class FXCrossFactory(object):
    def __init__(self, market_data_generator=None):
        self.logger = LoggerManager().getLogger(__name__)
        self.fxconv = FXConv()

        self.cache = {}

        self.calculations = Calculations()
        self.market_data_generator = market_data_generator

        return

    def flush_cache(self):
        self.cache = {}

    def get_fx_cross_tick(self,
                          start,
                          end,
                          cross,
                          cut="NYC",
                          source="dukascopy",
                          cache_algo='internet_load_return',
                          type='spot',
                          environment='backtest',
                          fields=['bid', 'ask']):

        if isinstance(cross, str):
            cross = [cross]

        market_data_request = MarketDataRequest(
            gran_freq="tick",
            freq_mult=1,
            freq='tick',
            cut=cut,
            fields=['bid', 'ask', 'bidv', 'askv'],
            cache_algo=cache_algo,
            environment=environment,
            start_date=start,
            finish_date=end,
            data_source=source,
            category='fx')

        market_data_generator = self.market_data_generator
        data_frame_agg = None

        for cr in cross:

            if (type == 'spot'):
                market_data_request.tickers = cr

                cross_vals = market_data_generator.fetch_market_data(
                    market_data_request)

                # if user only wants 'close' calculate that from the bid/ask fields
                if fields == ['close']:
                    cross_vals = cross_vals[[cr + '.bid',
                                             cr + '.ask']].mean(axis=1)
                    cross_vals.columns = [cr + '.close']

            if data_frame_agg is None:
                data_frame_agg = cross_vals
            else:
                data_frame_agg = data_frame_agg.join(cross_vals, how='outer')

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()
        return data_frame_agg

    def get_fx_cross(self,
                     start,
                     end,
                     cross,
                     cut="NYC",
                     source="bloomberg",
                     freq="intraday",
                     cache_algo='internet_load_return',
                     type='spot',
                     environment='backtest',
                     fields=['close']):

        if source == "gain" or source == 'dukascopy' or freq == 'tick':
            return self.get_fx_cross_tick(start,
                                          end,
                                          cross,
                                          cut=cut,
                                          source=source,
                                          cache_algo=cache_algo,
                                          type='spot',
                                          fields=fields)

        if isinstance(cross, str):
            cross = [cross]

        market_data_request_list = []
        freq_list = []
        type_list = []

        for cr in cross:
            market_data_request = MarketDataRequest(freq_mult=1,
                                                    cut=cut,
                                                    fields=['close'],
                                                    freq=freq,
                                                    cache_algo=cache_algo,
                                                    start_date=start,
                                                    finish_date=end,
                                                    data_source=source,
                                                    environment=environment)

            market_data_request.type = type
            market_data_request.cross = cr

            if freq == 'intraday':
                market_data_request.gran_freq = "minute"  # intraday

            elif freq == 'daily':
                market_data_request.gran_freq = "daily"  # daily

            market_data_request_list.append(market_data_request)

        data_frame_agg = []

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if DataConstants().market_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = DataConstants().market_thread_no['other']

        if market_data_request_list[0].data_source in DataConstants(
        ).market_thread_no:
            thread_no = DataConstants().market_thread_no[
                market_data_request_list[0].data_source]

        # fudge, issue with multithreading and accessing HDF5 files
        # if self.market_data_generator.__class__.__name__ == 'CachedMarketDataGenerator':
        #    thread_no = 0

        if (thread_no > 0):
            pool = Pool(thread_no)

            # open the market data downloads in their own threads and return the results
            result = pool.map_async(self._get_individual_fx_cross,
                                    market_data_request_list)
            data_frame_agg = self.calculations.iterative_outer_join(
                result.get())

            # data_frame_agg = self.calculations.pandas_outer_join(result.get())

            # pool would have already been closed earlier
            # try:
            #    pool.close()
            #    pool.join()
            # except: pass
        else:
            for md_request in market_data_request_list:
                data_frame_agg.append(
                    self._get_individual_fx_cross(md_request))

            data_frame_agg = self.calculations.pandas_outer_join(
                data_frame_agg)

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()

        return data_frame_agg

    def _get_individual_fx_cross(self, market_data_request):
        cr = market_data_request.cross
        type = market_data_request.type
        freq = market_data_request.freq

        base = cr[0:3]
        terms = cr[3:6]

        if (type == 'spot'):
            # non-USD crosses
            if base != 'USD' and terms != 'USD':
                base_USD = self.fxconv.correct_notation('USD' + base)
                terms_USD = self.fxconv.correct_notation('USD' + terms)

                # TODO check if the cross exists in the database

                # download base USD cross
                market_data_request.tickers = base_USD
                market_data_request.category = 'fx'

                if base_USD + '.close' in self.cache:
                    base_vals = self.cache[base_USD + '.close']
                else:
                    base_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                    self.cache[base_USD + '.close'] = base_vals

                # download terms USD cross
                market_data_request.tickers = terms_USD
                market_data_request.category = 'fx'

                if terms_USD + '.close' in self.cache:
                    terms_vals = self.cache[terms_USD + '.close']
                else:
                    terms_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                    self.cache[terms_USD + '.close'] = terms_vals

                # if quoted USD/base flip to get USD terms
                if (base_USD[0:3] == 'USD'):
                    if 'USD' + base in '.close' in self.cache:
                        base_vals = self.cache['USD' + base + '.close']
                    else:
                        base_vals = 1 / base_vals
                        self.cache['USD' + base + '.close'] = base_vals

                # if quoted USD/terms flip to get USD terms
                if (terms_USD[0:3] == 'USD'):
                    if 'USD' + terms in '.close' in self.cache:
                        terms_vals = self.cache['USD' + terms + '.close']
                    else:
                        terms_vals = 1 / terms_vals
                        self.cache['USD' + terms + '.close'] = base_vals

                base_vals.columns = ['temp']
                terms_vals.columns = ['temp']

                cross_vals = base_vals.div(terms_vals, axis='index')
                cross_vals.columns = [cr + '.close']

                base_vals.columns = [base_USD + '.close']
                terms_vals.columns = [terms_USD + '.close']
            else:
                # if base == 'USD': non_USD = terms
                # if terms == 'USD': non_USD = base

                correct_cr = self.fxconv.correct_notation(cr)

                market_data_request.tickers = correct_cr
                market_data_request.category = 'fx'

                if correct_cr + '.close' in self.cache:
                    cross_vals = self.cache[correct_cr + '.close']
                else:
                    cross_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)

                    # flip if not convention
                    if (correct_cr != cr):
                        if cr + '.close' in self.cache:
                            cross_vals = self.cache[cr + '.close']
                        else:
                            cross_vals = 1 / cross_vals
                            self.cache[cr + '.close'] = cross_vals

                    self.cache[correct_cr + '.close'] = cross_vals

                # cross_vals = self.market_data_generator.harvest_time_series(market_data_request)
                cross_vals.columns.names = [cr + '.close']

        elif type[0:3] == "tot":
            if freq == 'daily':
                # download base USD cross
                market_data_request.tickers = base + 'USD'
                market_data_request.category = 'fx-tot'

                if type == "tot":
                    base_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                else:
                    x = 0

                # download terms USD cross
                market_data_request.tickers = terms + 'USD'
                market_data_request.category = 'fx-tot'

                if type == "tot":
                    terms_vals = self.market_data_generator.fetch_market_data(
                        market_data_request)
                else:
                    pass

                base_rets = self.calculations.calculate_returns(base_vals)
                terms_rets = self.calculations.calculate_returns(terms_vals)

                cross_rets = base_rets.sub(terms_rets.iloc[:, 0], axis=0)

                # first returns of a time series will by NaN, given we don't know previous point
                cross_rets.iloc[0] = 0

                cross_vals = self.calculations.create_mult_index(cross_rets)
                cross_vals.columns = [cr + '-tot.close']

            elif freq == 'intraday':
                self.logger.info(
                    'Total calculated returns for intraday not implemented yet'
                )
                return None

        return cross_vals
コード例 #17
0
    def free_form_tickers_query(self, free_form_query, best_match_only=False,
                                list_query=False,
                                ret_fields=["category", "data_source", "freq",
                                            "cut", "tickers", "vendor_tickers",
                                            "fields"],
                                smart_group=True):
        """From a string or list of properties for predefined tickers, we 
        create a DataFrame that can be used to populate a MarketDataRequest. 
        We search through all the predefined tickers, and "guess" any matches 
        to our query, without having to use the standard query format which 
        consists of category.data_source.freq.cut.ticker such as this example
        fx.bloomberg.daily.NYC.EURUSD.close

        eg. quandl.fx will match all tickers which are from "quandl" and 
        have a "category" fx

        We must be careful to make sure that categories, data_sources  etc. 
        are unique and do not overlap with other properties like tickers

        Parameters
        ----------
        free_form_query : str
            A query that can be used to generate a MarketDataRequest

            eg. quandl.fx

        best_match_only : bool
            Only return at most 1 row of a DataFrame (default: False)

        list_query : bool
            Is this a list of tickers?

        ret_fields : str(list)
            Which properties of a MarketDataRequest to return

        smart_group : bool
            Smart group tickers of a particular category in a specific row

        Returns
        -------
        DataFrame
        """
        logger = LoggerManager().getLogger(__name__)

        logger.info(
            "Finding ticker combination which matches " + str(free_form_query))

        df = ConfigManager._data_frame_time_series_tickers

        if list_query and isinstance(free_form_query, list):
            free_form_query = free_form_query
        elif "," in free_form_query:
            free_form_query = free_form_query.split(",")
        else:
            free_form_query = [free_form_query]

        df_joined_list = []

        for key in free_form_query:
            df_joined = df

            key = ConfigManager.split_ticker_string(key)

            # Search through all the keywords, and see if matches with any 
            # columns of our predefined tickers
            try:
                for k in key:
                    for c in df.columns:
                        try:
                            df_temp = df_joined[df_joined[c] == k]
                        except:
                            df_temp = pd.DataFrame()

                        if not (df_temp.empty):
                            df_joined = df_temp
                            break

                df_joined_list.append(df_joined)
            except Exception as e:
                pass

        # Drop any duplicated tickers
        df = pd.concat(df_joined_list).drop_duplicates()

        if len(df.index) > 1:
            logger.info(
                "Found multiple matches for ticker combination, first "
                "trying smart group...")

            if smart_group:
                df = self.smart_group_dataframe_tickers(
                    df, ret_fields=ret_fields)

            if best_match_only:
                logger.info("Taking only top match...")
                df = pd.DataFrame(df.head(1))

        if ret_fields is not None and not (df.empty):
            df = df[ret_fields]

        return df
コード例 #18
0
class BBGLowLevelRef(BBGLowLevelTemplate):
    def __init__(self):
        super(BBGLowLevelRef, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, market_data_request):
        options = OptionsBBG()

        options.security = market_data_request.tickers
        options.startDateTime = market_data_request.start_date
        options.endDateTime = market_data_request.finish_date
        options.fields = market_data_request.fields

        options.overrides = market_data_request.overrides

        return options

    def process_message(self, msg):
        data = collections.defaultdict(dict)

        # process received events
        securityDataArray = msg.getElement('securityData')

        index = 0
        single = False

        for securityData in list(securityDataArray.values()):

            ticker = securityData.getElementAsString("security")
            fieldData = securityData.getElement("fieldData")

            for field in fieldData.elements():
                if not field.isValid():
                    field_name = "%s" % field.name()

                    self.logger.error(field_name + " is NULL")
                elif field.isArray():
                    # iterate over complex data returns.
                    field_name = "%s" % field.name()

                    for i, row in enumerate(field.values()):
                        try:
                            field_val = re.findall(r'"(.*?)"', "%s" % row)[0]
                        except:
                            e = row.getElement(0)
                            # k = str(e.name())
                            field_val = e.getValue()

                        data[(field_name, ticker)][index] = field_val

                        index = index + 1
                else:
                    field_name = "%s" % field.name()
                    data[(field_name, ticker)][0] = field.getValueAsString()

                    index = index + 1
                    single = True  # no need to create multi-index late, because just row!! CAREFUL!! needed for futures expiries

            fieldExceptionArray = securityData.getElement("fieldExceptions")

            for fieldException in list(fieldExceptionArray.values()):
                errorInfo = fieldException.getElement("errorInfo")

                print(errorInfo.getElementAsString("category"), ":", \
                      fieldException.getElementAsString("fieldId"))
                print("stop")

        # explicitly state from_dict (buggy if create pandas.DataFrame(data)
        data_frame = pandas.DataFrame.from_dict(data)

        # if obsolete ticker could return no values
        if (not (data_frame.empty)):
            # if not(single):
            #    pass
            # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker'])

            self.logger.info("Reading: " + ticker + ' ' +
                             str(data_frame.index[0]) + ' - ' +
                             str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    def combine_slices(self, data_frame_cols, data_frame_slice):
        if (data_frame_slice.columns.get_level_values(1).values[0]
                not in data_frame_cols):
            # return data_frame.join(data_frame_slice, how="outer")
            return data_frame_slice

        return None

    # create request for data
    def send_bar_request(self, session, eventQueue, options, cid):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest('ReferenceDataRequest')

        self.add_override(request, 'TIME_ZONE_OVERRIDE', 23)  # force GMT time
        self.add_override(request, 'INCLUDE_EXPIRED_CONTRACTS',
                          "Y")  # include expired contracts
        self.add_override(request, 'START_DT',
                          options.startDateTime.strftime('%Y%m%d'))
        self.add_override(request, 'END_DT',
                          options.endDateTime.strftime('%Y%m%d'))

        # only one security/eventType per request
        for field in options.fields:
            request.getElement("fields").appendValue(field)

        for security in options.security:
            request.getElement("securities").appendValue(security)

        if options.overrides != {}:
            for k in options.overrides.keys():
                new_k = k

                # is there a pretty name for this?
                if k in super().convert_override_fields:
                    new_k = super().convert_override_fields[k]

                self.add_override(request, new_k, options.overrides[k])

        self.logger.info("Sending Bloomberg Ref Request:" + str(request))
        session.sendRequest(request=request, correlationId=cid)
コード例 #19
0
class BacktestRequest(MarketDataRequest):
    """Contains parameters necessary to define a backtest, including start date, finish date, transaction cost, etc

    Used by TradingModel and Backtest to construct backtested returns for trading strategies

    """

    def __init__(self):
        super(MarketDataRequest, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        self.__signal_name = None

        # output parameters for backtest (should we add returns statistics on legends, write CSVs with returns etc.)
        self.__plot_start = None
        self.__calc_stats = True
        self.__write_csv = False
        self.__write_csv_pnl = False
        self.__plot_interim = False
        self.__include_benchmark = False

        self.__tech_params = TechParams()

        # default parameters for portfolio level vol adjustment
        self.__portfolio_vol_adjust = False
        self.__portfolio_vol_period_shift = 0
        self.__portfolio_vol_rebalance_freq = None
        self.__portfolio_vol_resample_freq = None
        self.__portfolio_vol_resample_type = 'mean'
        self.__portfolio_vol_target = 0.1           # 10% vol target
        self.__portfolio_vol_max_leverage = None
        self.__portfolio_vol_periods = 20
        self.__portfolio_vol_obs_in_year = 252

        # default parameters for signal level vol adjustment
        self.__signal_vol_adjust = False
        self.__signal_vol_period_shift = 0
        self.__signal_vol_rebalance_freq = None
        self.__signal_vol_resample_freq = None      
        self.__signal_vol_resample_type = 'mean'
        self.__signal_vol_target = 0.1              # 10% vol target
        self.__signal_vol_max_leverage = None
        self.__signal_vol_periods = 20
        self.__signal_vol_obs_in_year = 252

        # portfolio notional size
        self.__portfolio_notional_size = None
        self.__portfolio_combination = None
        self.__portfolio_combination_weights = None
        
        # parameters for maximum position limits (expressed as whole portfolio)
        self.__max_net_exposure = None
        self.__max_abs_exposure = None

        self.__position_clip_rebalance_freq = None
        self.__position_clip_resample_freq = None  # by default apply max position criterion on last business day of month
        self.__position_clip_resample_type = 'mean'
        self.__position_clip_period_shift = 0

        # take profit and stop loss parameters
        self.__take_profit = None
        self.__stop_loss = None

        # should we delay the signal?
        self.__signal_delay = 0
        
    ##### properties for output of the backtest
    @property
    def plot_start(self): return self.__plot_start

    @plot_start.setter
    def plot_start(self, plot_start): self.__plot_start = plot_start
    
    @property
    def calc_stats(self): return self.__calc_stats

    @calc_stats.setter
    def calc_stats(self, calc_stats): self.__calc_stats = calc_stats
    
    @property
    def write_csv(self): return self.__write_csv

    @write_csv.setter
    def write_csv(self, write_csv): self.__write_csv = write_csv

    @property
    def write_csv_pnl(self):
        return self.__write_csv_pnl

    @write_csv_pnl.setter
    def write_csv_pnl(self, write_csv_pnl):
        self.__write_csv_pnl = write_csv_pnl
    
    @property
    def plot_interim(self): return self.__plot_interim

    @plot_interim.setter
    def plot_interim(self, plot_interim): self.__plot_interim = plot_interim
    
    @property
    def include_benchmark(self): return self.__include_benchmark

    @include_benchmark.setter
    def include_benchmark(self, include_benchmark): self.__include_benchmark = include_benchmark

    ##### properties for portfolio level volatility adjustment
    @property
    def portfolio_vol_adjust(self): return self.__portfolio_vol_adjust

    @portfolio_vol_adjust.setter
    def portfolio_vol_adjust(self, portfolio_vol_adjust): self.__portfolio_vol_adjust = portfolio_vol_adjust
    
    @property
    def portfolio_vol_rebalance_freq(self): return self.__portfolio_vol_rebalance_freq

    @portfolio_vol_rebalance_freq.setter
    def portfolio_vol_rebalance_freq(self, portfolio_vol_rebalance_freq): self.__portfolio_vol_rebalance_freq = portfolio_vol_rebalance_freq
    
    @property
    def portfolio_vol_resample_type(self): return self.__portfolio_vol_resample_type

    @portfolio_vol_resample_type.setter
    def portfolio_vol_resample_type(self, portfolio_vol_resample_type): self.__portfolio_vol_resample_type = portfolio_vol_resample_type

    @property
    def portfolio_vol_resample_freq(self): return self.__portfolio_vol_resample_freq

    @portfolio_vol_resample_freq.setter
    def portfolio_vol_resample_freq(self, portfolio_vol_resample_freq): self.__portfolio_vol_resample_freq = portfolio_vol_resample_freq

    @property
    def portfolio_vol_period_shift(self): return self.__portfolio_vol_period_shift

    @portfolio_vol_period_shift.setter
    def portfolio_vol_period_shift(self, portfolio_vol_period_shift): self.__portfolio_vol_period_shift = portfolio_vol_period_shift
    
    @property
    def portfolio_vol_target(self): return self.__portfolio_vol_target

    @portfolio_vol_target.setter
    def portfolio_vol_target(self, portfolio_vol_target): self.__portfolio_vol_target = portfolio_vol_target
    
    @property
    def portfolio_vol_max_leverage(self): return self.__portfolio_vol_max_leverage

    @portfolio_vol_max_leverage.setter
    def portfolio_vol_max_leverage(self, portfolio_vol_max_leverage): self.__portfolio_vol_max_leverage = portfolio_vol_max_leverage
    
    @property
    def portfolio_vol_periods(self): return self.__portfolio_vol_periods

    @portfolio_vol_periods.setter
    def portfolio_vol_periods(self, portfolio_vol_periods): self.__portfolio_vol_periods = portfolio_vol_periods
    
    @property
    def portfolio_vol_obs_in_year(self): return self.__portfolio_vol_obs_in_year

    @portfolio_vol_obs_in_year.setter
    def portfolio_vol_obs_in_year(self, portfolio_vol_obs_in_year): self.__portfolio_vol_obs_in_year = portfolio_vol_obs_in_year

    ##### properties for signal level vol adjustment
    @property
    def signal_vol_adjust(self): return self.__signal_vol_adjust

    @signal_vol_adjust.setter
    def signal_vol_adjust(self, signal_vol_adjust): self.__signal_vol_adjust = signal_vol_adjust
    
    @property
    def signal_vol_rebalance_freq(self): return self.__signal_vol_rebalance_freq

    @signal_vol_rebalance_freq.setter
    def signal_vol_rebalance_freq(self, signal_vol_rebalance_freq): self.__signal_vol_rebalance_freq = signal_vol_rebalance_freq
    
    @property
    def signal_vol_resample_type(self): return self.__signal_vol_resample_type

    @signal_vol_resample_type.setter
    def signal_vol_resample_type(self, signal_vol_resample_type): self.__signal_vol_resample_type = signal_vol_resample_type

    @property
    def signal_vol_resample_freq(self): return self.__signal_vol_resample_freq

    @signal_vol_resample_freq.setter
    def signal_vol_resample_freq(self, signal_vol_resample_freq): self.__signal_vol_resample_freq = signal_vol_resample_freq

    @property
    def signal_vol_period_shift(self): return self.__signal_vol_period_shift

    @signal_vol_period_shift.setter
    def signal_vol_period_shift(self, signal_vol_period_shift): self.__signal_vol_period_shift = signal_vol_period_shift

    @property
    def signal_vol_target(self): return self.__signal_vol_target

    @signal_vol_target.setter
    def signal_vol_target(self, signal_vol_target): self.__signal_vol_target = signal_vol_target

    @property
    def signal_vol_max_leverage(self): return self.__signal_vol_max_leverage

    @signal_vol_max_leverage.setter
    def signal_vol_max_leverage(self, signal_vol_max_leverage): self.__signal_vol_max_leverage = signal_vol_max_leverage

    @property
    def signal_vol_periods(self): return self.__signal_vol_periods

    @signal_vol_periods.setter
    def signal_vol_periods(self, signal_vol_periods): self.__signal_vol_periods = signal_vol_periods

    @property
    def signal_vol_obs_in_year(self): return self.__signal_vol_obs_in_year

    @signal_vol_obs_in_year.setter
    def signal_vol_obs_in_year(self, signal_vol_obs_in_year): self.__signal_vol_obs_in_year = signal_vol_obs_in_year
    
    ##### portfolio notional size
    @property
    def portfolio_notional_size(self): return self.__portfolio_notional_size

    @portfolio_notional_size.setter
    def portfolio_notional_size(self, portfolio_notional_size): self.__portfolio_notional_size = float(portfolio_notional_size)

    ##### portfolio combination style (sum, mean, weighted, weighted-sum)
    @property
    def portfolio_combination(self): return self.__portfolio_combination

    @portfolio_combination.setter
    def portfolio_combination(self, portfolio_combination): self.__portfolio_combination = portfolio_combination
    
    ##### portfolio weights (sum, mean)
    @property
    def portfolio_combination_weights(self): return self.__portfolio_combination_weights

    @portfolio_combination_weights.setter
    def portfolio_combination_weights(self, portfolio_combination_weights): self.__portfolio_combination_weights = portfolio_combination_weights
    
    ##### properties for maximum position constraints
    @property
    def max_net_exposure(self): return self.__max_net_exposure

    @max_net_exposure.setter
    def max_net_exposure(self, max_net_exposure): self.__max_net_exposure = max_net_exposure
    
    @property
    def max_abs_exposure(self): return self.__max_abs_exposure

    @max_abs_exposure.setter
    def max_abs_exposure(self, max_abs_exposure): self.__max_abs_exposure = max_abs_exposure
    
    @property
    def position_clip_rebalance_freq(self): return self.__position_clip_rebalance_freq

    @position_clip_rebalance_freq.setter
    def position_clip_rebalance_freq(self, position_clip_rebalance_freq): self.__position_clip_rebalance_freq = position_clip_rebalance_freq

    @property
    def position_clip_resample_type(self): return self.__position_clip_resample_type

    @position_clip_resample_type.setter
    def position_clip_resample_type(self, position_clip_resample_type): self.__position_clip_resample_type = position_clip_resample_type

    @property
    def position_clip_resample_freq(self): return self.__position_clip_resample_freq

    @position_clip_resample_freq.setter
    def position_clip_resample_freq(self, position_clip_resample_freq): self.__position_clip_resample_freq = position_clip_resample_freq

    @property
    def position_clip_period_shift(self): return self.__position_clip_period_shift

    @position_clip_period_shift.setter
    def position_clip_period_shift(self, position_clip_period_shift): self.__position_clip_period_shift = position_clip_period_shift

    ##### stop loss and take profit
    @property
    def stop_loss(self): return self.__stop_loss

    @stop_loss.setter
    def stop_loss(self, stop_loss): self.__stop_loss = stop_loss
    
    @property
    def take_profit(self): return self.__take_profit

    @take_profit.setter
    def take_profit(self, take_profit): self.__take_profit = take_profit

    ##### tech indicators and spot bp tc
    @property
    def tech_params(self): return self.__tech_params

    @tech_params.setter
    def tech_params(self, tech_params): self.__tech_params = tech_params

    @property
    def spot_tc_bp(self): return self.__spot_tc_bp

    @spot_tc_bp.setter
    def spot_tc_bp(self, spot_tc_bp): self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0)

    #### FOR FUTURE USE ###

    @property
    def signal_name(self): return self.__signal_name

    @signal_name.setter
    def signal_name(self, signal_name): self.__signal_name = signal_name

    @property
    def asset(self): return self.__asset

    @asset.setter
    def asset(self, asset):
        valid_asset = ['fx', 'multi-asset']

        if not asset in valid_asset: self.logger.warning(asset & " is not a defined asset.")

        self.__asset = asset

    @property
    def instrument(self): return self.__instrument

    @instrument.setter
    def instrument(self, instrument):
        valid_instrument = ['spot', 'futures', 'options']

        if not instrument in valid_instrument: self.logger.warning(instrument & " is not a defined trading instrument.")

        self.__instrument = instrument

    @property
    def signal_delay(self):
        return self.__signal_delay

    @signal_delay.setter
    def signal_delay(self, signal_delay):
        self.__signal_delay = signal_delay
コード例 #20
0
    def __init__(self):
        super(BBGLowLevelRef, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)
コード例 #21
0
ファイル: filter.py プロジェクト: pkan0583/findatapy
 def __init__(self):
     # self.config = ConfigManager()
     self.logger = LoggerManager().getLogger(__name__)
     return
コード例 #22
0
class BBGLowLevelIntraday(BBGLowLevelTemplate):
    def __init__(self):
        super(BBGLowLevelIntraday, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

        # constants
        self.BAR_DATA = blpapi.Name("barData")
        self.BAR_TICK_DATA = blpapi.Name("barTickData")
        self.OPEN = blpapi.Name("open")
        self.HIGH = blpapi.Name("high")
        self.LOW = blpapi.Name("low")
        self.CLOSE = blpapi.Name("close")
        self.VOLUME = blpapi.Name("volume")
        self.NUM_EVENTS = blpapi.Name("numEvents")
        self.TIME = blpapi.Name("time")

    def combine_slices(self, data_frame_cols, data_frame_slice):
        # return data_frame.append(data_frame_slice)
        return data_frame_slice

    # populate options for Bloomberg request for asset intraday request
    def fill_options(self, market_data_request):
        options = OptionsBBG()

        options.security = market_data_request.tickers[
            0]  # get 1st ticker only!
        options.event = market_data_request.trade_side.upper()
        options.barInterval = market_data_request.freq_mult
        options.startDateTime = market_data_request.start_date
        options.endDateTime = market_data_request.finish_date
        options.gapFillInitialBar = False
        options.overrides = market_data_request.overrides

        if hasattr(options.startDateTime, 'microsecond'):
            options.startDateTime = options.startDateTime.replace(
                microsecond=0)

        if hasattr(options.endDateTime, 'microsecond'):
            options.endDateTime = options.endDateTime.replace(microsecond=0)

        return options

    # iterate through Bloomberg output creating a DataFrame output
    # implements abstract method
    def process_message(self, msg):
        data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA)

        # self.logger.info("Processing intraday data for " + str(self._options.security))

        data_vals = list(data.values())

        # data_matrix = numpy.zeros([len(data_vals), 6])
        # data_matrix.fill(numpy.nan)
        #
        # date_index = [None] * len(data_vals)
        #
        # for i in range(0, len(data_vals)):
        #     data_matrix[i][0] = data_vals[i].getElementAsFloat(self.OPEN)
        #     data_matrix[i][1] = data_vals[i].getElementAsFloat(self.HIGH)
        #     data_matrix[i][2] = data_vals[i].getElementAsFloat(self.LOW)
        #     data_matrix[i][3] = data_vals[i].getElementAsFloat(self.CLOSE)
        #     data_matrix[i][4] = data_vals[i].getElementAsInteger(self.VOLUME)
        #     data_matrix[i][5] = data_vals[i].getElementAsInteger(self.NUM_EVENTS)
        #
        #     date_index[i] = data_vals[i].getElementAsDatetime(self.TIME)
        #
        # self.logger.info("Dates between " + str(date_index[0]) + " - " + str(date_index[-1]))
        #
        # # create pandas dataframe with the Bloomberg output
        # return pandas.DataFrame(data = data_matrix, index = date_index,
        #                columns=['open', 'high', 'low', 'close', 'volume', 'events'])

        ## for loop method is touch slower
        # time_list = []
        # data_table = []

        # for bar in data_vals:
        #     data_table.append([bar.getElementAsFloat(self.OPEN),
        #                  bar.getElementAsFloat(self.HIGH),
        #                  bar.getElementAsFloat(self.LOW),
        #                  bar.getElementAsFloat(self.CLOSE),
        #                  bar.getElementAsInteger(self.VOLUME),
        #                  bar.getElementAsInteger(self.NUM_EVENTS)])
        #
        #     time_list.append(bar.getElementAsDatetime(self.TIME))

        # each price time point has multiple fields - marginally quicker
        tuple = [([
            bar.getElementAsFloat(self.OPEN),
            bar.getElementAsFloat(self.HIGH),
            bar.getElementAsFloat(self.LOW),
            bar.getElementAsFloat(self.CLOSE),
            bar.getElementAsInteger(self.VOLUME),
            bar.getElementAsInteger(self.NUM_EVENTS)
        ], bar.getElementAsDatetime(self.TIME)) for bar in data_vals]

        data_table = list(map(itemgetter(0), tuple))
        time_list = list(map(itemgetter(1), tuple))

        try:
            self.logger.info("Dates between " + str(time_list[0]) + " - " +
                             str(time_list[-1]))
        except:
            self.logger.info("No dates retrieved")
            return None

        # create pandas dataframe with the Bloomberg output
        return pandas.DataFrame(
            data=data_table,
            index=time_list,
            columns=['open', 'high', 'low', 'close', 'volume', 'events'])

    # implement abstract method: create request for data
    def send_bar_request(self, session, eventQueue, options, cid):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("IntradayBarRequest")

        # only one security/eventType per request
        request.set("security", options.security)
        request.set("eventType", options.event)
        request.set("interval", options.barInterval)

        # self.add_override(request, 'TIME_ZONE_OVERRIDE', 'GMT')

        if options.startDateTime and options.endDateTime:
            request.set("startDateTime", options.startDateTime)
            request.set("endDateTime", options.endDateTime)

        if options.gapFillInitialBar:
            request.append("gapFillInitialBar", True)

        self.logger.info("Sending Intraday Bloomberg Request...")

        session.sendRequest(request=request, correlationId=cid)
コード例 #23
0
    def remove_time_series_cache_on_disk(self,
                                         fname,
                                         engine='hdf5_fixed',
                                         db_server='127.0.0.1',
                                         db_port='6379',
                                         timeout=10,
                                         username=None,
                                         password=None):

        logger = LoggerManager().getLogger(__name__)

        if 'hdf5' in engine:
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            pass
        elif (engine == 'redis'):

            fname = os.path.basename(fname).replace('.', '_')

            try:
                r = redis.StrictRedis(host=db_server,
                                      port=db_port,
                                      db=0,
                                      socket_timeout=timeout,
                                      socket_connect_timeout=timeout)

                if (fname == 'flush_all_keys'):
                    r.flushall()
                else:
                    # allow deletion of keys by pattern matching

                    x = r.keys('*' + fname)

                    if len(x) > 0:
                        r.delete(x)

                    # r.delete(fname)

            except Exception as e:
                logger.warning("Cannot delete non-existent key " + fname +
                               " in Redis: " + str(e))

        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            logger.info('Load MongoDB library: ' + fname)

            if username is not None and password is not None:
                c = pymongo.MongoClient(
                    host="mongodb://" + username + ":" + password + "@" +
                    str(db_server) + ":" + str(db_port),
                    connect=False)  # , username=username, password=password)
            else:
                c = pymongo.MongoClient(host="mongodb://" + str(db_server) +
                                        ":" + str(db_port),
                                        connect=False)

            store = Arctic(c,
                           socketTimeoutMS=socketTimeoutMS,
                           serverSelectionTimeoutMS=socketTimeoutMS,
                           connectTimeoutMS=socketTimeoutMS)

            store.delete_library(fname)

            c.close()

            logger.info("Deleted MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # delete the old copy
            try:
                os.remove(h5_filename)
            except:
                pass
コード例 #24
0
class MarketDataRequest(object):
    """Provides parameters for requesting market data.

    Includes parameters to define the ticker we'd like to fetch, the start and finish dates for our request, as well as
    the various fields we would like and also the frequency of the data.

    """

    # properties
    #
    # data_source eg. bbg, yahoo, quandl
    # start_date
    # finish_date
    # tickers (can be list) eg. EURUSD
    # category (eg. fx, equities, fixed_income, cal_event, fundamental)
    # freq_mult (eg. 1)
    # freq (tick, intraday or daily)
    # gran_freq (minute, daily, hourly, daily, weekly, monthly, yearly)
    # fields (can be list)
    # vendor_tickers (optional)
    # vendor_fields (optional)
    # cache_algo (eg. internet, disk, memory) - internet will forcibly download from the internet
    # abstract_curve (optional)
    # environment (eg. prod, backtest) - old data is saved with prod, backtest will overwrite the last data point
    # overrides (optional) - if you need to specify any data overrides (eg. for BBG)

    def generate_key(self):
        """Generate a key to describe this MarketDataRequest object, which can be used in a cache, as a hash-style key

        Returns
        -------
        str
            Key to describe this MarketDataRequest

        """
        from findatapy.market.ioengine import SpeedCache

        if self.freq == 'daily': ticker = None
        else: ticker = self.tickers[0]

        self.__category_key = self.create_category_key(self, ticker=ticker)

        return SpeedCache().generate_key(self, [
            'logger', '_MarketDataRequest__abstract_curve',
            '_MarketDataRequest__cache_algo', '_MarketDataRequest__overrides'
        ])

    def __init__(self,
                 data_source=None,
                 start_date='year',
                 finish_date=datetime.datetime.utcnow(),
                 tickers=None,
                 category=None,
                 freq_mult=1,
                 freq="daily",
                 gran_freq=None,
                 cut="NYC",
                 fields=['close'],
                 cache_algo="internet_load_return",
                 vendor_tickers=None,
                 vendor_fields=None,
                 environment="backtest",
                 trade_side='trade',
                 expiry_date=None,
                 md_request=None,
                 abstract_curve=None,
                 overrides={}):

        self.logger = LoggerManager().getLogger(__name__)

        # can deep copy MarketDataRequest (use a lock, so can be used with threading when downloading time series)
        if md_request is not None:
            import threading
            lock = threading.Lock()

            with lock:
                import copy

                self.freq_mult = copy.deepcopy(md_request.freq_mult)

                # define frequency of data
                self.gran_freq = copy.deepcopy(md_request.gran_freq)
                self.freq_mult = copy.deepcopy(md_request.freq_mult)
                self.freq = copy.deepcopy(md_request.freq)

                # data source, start and fin
                self.data_source = copy.deepcopy(md_request.data_source)
                self.start_date = copy.deepcopy(md_request.start_date)
                self.finish_date = copy.deepcopy(md_request.finish_date)

                self.category = copy.deepcopy(
                    md_request.category)  # special predefined categories

                self.cut = copy.deepcopy(
                    md_request.cut
                )  # closing time of the data (eg. NYC, LDN, TOK etc)
                self.fields = copy.deepcopy(
                    md_request.fields)  # fields, eg. close, high, low, open
                self.cache_algo = copy.deepcopy(
                    md_request.cache_algo
                )  # internet_load_return (cache_algo_return is for future use)
                self.vendor_tickers = copy.deepcopy(
                    md_request.vendor_tickers)  # define vendor tickers
                self.vendor_fields = copy.deepcopy(
                    md_request.vendor_fields)  # define vendor fields
                self.environment = copy.deepcopy(
                    md_request.environment
                )  # backtest environment only supported at present
                self.trade_side = copy.deepcopy(md_request.trade_side)
                self.expiry_date = copy.deepcopy(md_request.expiry_date)
                # self.abstract_curve = copy.deepcopy(md_request.abstract_curve)
                self.overrides = copy.deepcopy(md_request.overrides)

                self.tickers = copy.deepcopy(
                    md_request.tickers
                )  # need this after category in case have wildcard
        else:
            self.freq_mult = freq_mult

            # define frequency of data
            self.gran_freq = gran_freq
            self.freq_mult = freq_mult
            self.freq = freq

            # data source, start and fin
            self.data_source = data_source
            self.start_date = start_date
            self.finish_date = finish_date
            self.category = category  # special predefined categories

            self.cut = cut  # closing time of the data (eg. NYC, LDN, TOK etc)
            self.fields = fields  # fields, eg. close, high, low, open
            self.cache_algo = cache_algo  # internet_load_return (cache_algo_return is for future use)
            self.vendor_tickers = vendor_tickers  # define vendor tickers
            self.vendor_fields = vendor_fields  # define vendor fields
            self.environment = environment  # backtest environment only supported at present
            self.trade_side = trade_side
            self.expiry_date = expiry_date
            self.abstract_curve = abstract_curve

            self.overrides = overrides

            self.tickers = tickers

    def create_category_key(self, market_data_request, ticker=None):
        """Returns a category key for the associated MarketDataRequest, which can be used to create filenames (or
        as part of a storage key in a cache)

        Parameters
        ----------
        market_data_request : MarketDataRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        str
        """

        category = 'default-cat'
        cut = 'default-cut'

        if market_data_request.category is not None:
            category = market_data_request.category

        environment = market_data_request.environment
        source = market_data_request.data_source
        freq = market_data_request.freq

        if market_data_request.cut is not None: cut = market_data_request.cut

        if (ticker is not None):
            key = str(environment) + "." + str(category) + '.' + str(source) + '.' + str(freq) + '.' + str(cut) \
                  + '.' + str(ticker)
        else:
            key = str(environment) + "." + str(category) + '.' + str(
                source) + '.' + str(freq) + '.' + str(cut)

        return key

    @property
    def data_source(self):
        return self.__data_source

    @data_source.setter
    def data_source(self, data_source):
        try:
            valid_data_source = [
                'ats', 'bloomberg', 'dukascopy', 'fred', 'gain', 'google',
                'quandl', 'yahoo'
            ]

            if not data_source in valid_data_source:
                self.logger.warning(data_source
                                    & " is not a defined data source.")
        except:
            pass

        self.__data_source = data_source

    @property
    def category(self):
        return self.__category

    @category.setter
    def category(self, category):
        self.__category = category

    @property
    def tickers(self):
        return self.__tickers

    @tickers.setter
    def tickers(self, tickers):
        if tickers is not None:
            if not isinstance(tickers, list):
                tickers = [tickers]

        config = None

        new_tickers = []

        if tickers is not None:
            for tick in tickers:
                if '*' in tick:
                    start = ''

                    if tick[-1] == "*" and tick[0] != "*":
                        start = "^"

                    tick = start + "(" + tick.replace('*', '') + ")"

                    if config is None:
                        from findatapy.util import ConfigManager
                        config = ConfigManager().get_instance()

                    new_tickers.append(
                        config.get_filtered_tickers_list_for_category(
                            self.__category, self.__data_source, self.__freq,
                            self.__cut, tick))
                else:
                    new_tickers.append(tick)

            new_tickers = self._flatten_list(new_tickers)

            self.__tickers = new_tickers
        else:
            self.__tickers = tickers

    @property
    def fields(self):
        return self.__fields

    @fields.setter
    def fields(self, fields):
        valid_fields = ['open', 'high', 'low', 'close', 'volume', 'numEvents']

        if not isinstance(fields, list):
            fields = [fields]

        for field_entry in fields:
            if not field_entry in valid_fields:
                i = 0
                # self.logger.warning(field_entry + " is not a valid field.")

        # add error checking

        self.__fields = fields

    @property
    def vendor_tickers(self):
        return self.__vendor_tickers

    @vendor_tickers.setter
    def vendor_tickers(self, vendor_tickers):
        if vendor_tickers is not None:
            if not isinstance(vendor_tickers, list):
                vendor_tickers = [vendor_tickers]

        self.__vendor_tickers = vendor_tickers

    @property
    def vendor_fields(self):
        return self.__vendor_fields

    @vendor_fields.setter
    def vendor_fields(self, vendor_fields):
        if vendor_fields is not None:
            if not isinstance(vendor_fields, list):
                vendor_fields = [vendor_fields]

        self.__vendor_fields = vendor_fields

    @property
    def freq(self):
        return self.__freq

    @freq.setter
    def freq(self, freq):
        freq = freq.lower()

        valid_freq = [
            'tick', 'second', 'minute', 'intraday', 'hourly', 'daily',
            'weekly', 'monthly', 'quarterly', 'annually'
        ]

        if not freq in valid_freq:
            self.logger.warning(freq + " is not a defined frequency")

        self.__freq = freq

    @property
    def gran_freq(self):
        return self.__gran_freq

    @gran_freq.setter
    def gran_freq(self, gran_freq):
        try:
            gran_freq = gran_freq.lower()

            valid_gran_freq = [
                'tick', 'second', 'minute', 'hourly', 'pseudodaily', 'daily',
                'weekly', 'monthly', 'quarterly', 'annually'
            ]

            if not gran_freq in valid_gran_freq:
                self.logger.warning(gran_freq & " is not a defined frequency")

            if gran_freq in ['minute', 'hourly']:
                self.__freq = 'intraday'
            elif gran_freq in ['tick', 'second']:
                self.__freq = 'tick'
            else:
                self.__freq = 'daily'
        except:
            pass

        self.__gran_freq = gran_freq

    @property
    def freq_mult(self):
        return self.__freq_mult

    @freq_mult.setter
    def freq_mult(self, freq_mult):
        self.__freq_mult = freq_mult

    @property
    def start_date(self):
        return self.__start_date

    @start_date.setter
    def start_date(self, start_date):
        self.__start_date = self.date_parser(start_date)

    @property
    def finish_date(self):
        return self.__finish_date

    @finish_date.setter
    def finish_date(self, finish_date):
        self.__finish_date = self.date_parser(finish_date)

    @property
    def cut(self):
        return self.__cut

    @cut.setter
    def cut(self, cut):
        self.__cut = cut

    def date_parser(self, date):
        if isinstance(date, str):

            date1 = datetime.datetime.utcnow()

            if date is 'midnight':
                date1 = datetime.datetime(date1.year, date1.month, date1.day,
                                          0, 0, 0)
            elif date is 'decade':
                date1 = date1 - timedelta(days=365 * 10)
            elif date is 'year':
                date1 = date1 - timedelta(days=365)
            elif date is 'month':
                date1 = date1 - timedelta(days=30)
            elif date is 'week':
                date1 = date1 - timedelta(days=7)
            elif date is 'day':
                date1 = date1 - timedelta(days=1)
            elif date is 'hour':
                date1 = date1 - timedelta(hours=1)
            else:
                # format expected 'Jun 1 2005 01:33', '%b %d %Y %H:%M'
                try:
                    date1 = datetime.datetime.strptime(date, '%b %d %Y %H:%M')
                except:
                    # self.logger.warning("Attempted to parse date")
                    i = 0

                # format expected '1 Jun 2005 01:33', '%d %b %Y %H:%M'
                try:
                    date1 = datetime.datetime.strptime(date, '%d %b %Y %H:%M')
                except:
                    # self.logger.warning("Attempted to parse date")
                    i = 0

                try:
                    date1 = datetime.datetime.strptime(date, '%b %d %Y')
                except:
                    # self.logger.warning("Attempted to parse date")
                    i = 0

                try:
                    date1 = datetime.datetime.strptime(date, '%d %b %Y')
                except:
                    # self.logger.warning("Attempted to parse date")
                    i = 0
        else:
            import pandas

            date1 = pandas.Timestamp(date)

        return date1

    @property
    def cache_algo(self):
        return self.__cache_algo

    @cache_algo.setter
    def cache_algo(self, cache_algo):
        cache_algo = cache_algo.lower()

        valid_cache_algo = [
            'internet_load', 'internet_load_return', 'cache_algo',
            'cache_algo_return'
        ]

        if not cache_algo in valid_cache_algo:
            self.logger.warning(cache_algo +
                                " is not a defined caching scheme")

        self.__cache_algo = cache_algo

    @property
    def environment(self):
        return self.__environment

    @environment.setter
    def environment(self, environment):
        environment = environment.lower()

        valid_environment = ['prod', 'backtest']

        if not environment in valid_environment:
            self.logger.warning(environment + " is not a defined environment.")

        self.__environment = environment

    @property
    def trade_side(self):
        return self.__trade_side

    @trade_side.setter
    def trade_side(self, trade_side):
        trade_side = trade_side.lower()

        valid_trade_side = ['trade', 'bid', 'ask']

        if not trade_side in valid_trade_side:
            self.logger.warning(trade_side + " is not a defined trade side.")

        self.__trade_side = trade_side

    @property
    def expiry_date(self):
        return self.__expiry_date

    @expiry_date.setter
    def expiry_date(self, expiry_date):
        self.__expiry_date = self.date_parser(expiry_date)

    @property
    def abstract_curve(self):
        return self.__abstract_curve

    @abstract_curve.setter
    def abstract_curve(self, abstract_curve):
        if abstract_curve is not None:
            self.__abstract_curve_key = abstract_curve.generate_key()
        else:
            self.__abstract_curve_key = None

        self.__abstract_curve = abstract_curve

    @property
    def overrides(self):
        return self.__overrides

    @overrides.setter
    def overrides(self, overrides):
        self.__overrides = overrides

    def _flatten_list(self, list_of_lists):
        """Flattens list, particularly useful for combining baskets

        Parameters
        ----------
        list_of_lists : str (list)
            List to be flattened

        Returns
        -------

        """
        result = []

        for i in list_of_lists:
            # Only append if i is a basestring (superclass of string)
            if isinstance(i, str):
                result.append(i)
            # Otherwise call this function recursively
            else:
                result.extend(self._flatten_list(i))
        return result
コード例 #25
0
    def read_time_series_cache_from_disk(self,
                                         fname,
                                         engine='hdf5',
                                         start_date=None,
                                         finish_date=None,
                                         db_server=constants.db_server,
                                         db_port=constants.db_port,
                                         username=constants.db_username,
                                         password=constants.db_password):
        """Reads time series cache from disk in either HDF5 or bcolz

        Parameters
        ----------
        fname : str (or list)
            file to be read from
        engine : str (optional)
            'hd5' - reads HDF5 files (default)
            'arctic' - reads from Arctic/MongoDB database
            'bcolz' - reads from bcolz file (not fully implemented)
            'parquet' - reads from Parquet
        start_date : str/datetime (optional)
            Start date
        finish_date : str/datetime (optional)
            Finish data
        db_server : str
            IP address of MongdDB (default '127.0.0.1')

        Returns
        -------
        DataFrame
        """

        logger = LoggerManager.getLogger(__name__)

        data_frame_list = []

        if not (isinstance(fname, list)):
            if '*' in fname:
                fname = glob.glob(fname)
            else:
                fname = [fname]

        for fname_single in fname:
            logger.debug("Reading " + fname_single + "..")

            if engine == 'parquet' and '.gzip' not in fname_single and '.parquet' not in fname_single:
                fname_single = fname_single + '.parquet'

            if (engine == 'bcolz'):
                try:
                    name = self.get_bcolz_filename(fname_single)
                    zlens = bcolz.open(rootdir=name)
                    data_frame = zlens.todataframe()

                    data_frame.index = pandas.DatetimeIndex(data_frame['DTS_'])
                    data_frame.index.name = 'Date'
                    del data_frame['DTS_']

                    # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas
                    data_frame.columns = self.find_replace_chars(
                        data_frame.columns, _replace_chars, _invalid_chars)
                    data_frame.columns = [x[2:] for x in data_frame.columns]
                except:
                    data_frame = None

            elif (engine == 'redis'):
                fname_single = os.path.basename(fname_single).replace('.', '_')

                msg = None

                try:
                    # for pyarrow
                    context = pa.default_serialization_context()

                    r = redis.StrictRedis(host=db_server, port=db_port, db=0)

                    # is there a compressed key stored?)
                    k = r.keys('comp_*_' + fname_single)

                    # if so, then it means that we have stored it as a compressed object
                    # if have more than 1 element, take the last (which will be the latest to be added)
                    if (len(k) >= 1):
                        k = k[-1].decode('utf-8')

                        comp = r.get(k)

                        siz = int(k.split('_')[1])
                        dec = pa.decompress(comp,
                                            codec='lz4',
                                            decompressed_size=siz)

                        msg = context.deserialize(dec)
                    else:
                        msg = r.get(fname_single)

                        # print(fname_single)
                        if msg is not None:
                            msg = context.deserialize(msg)
                            # logger.warning("Key " + fname_single + " not in Redis cache?")

                except Exception as e:
                    logger.info("Cache not existent for " + fname_single +
                                " in Redis: " + str(e))

                if msg is None:
                    data_frame = None
                else:
                    logger.info('Load Redis cache: ' + fname_single)

                    data_frame = msg  # pandas.read_msgpack(msg)

            elif (engine == 'arctic'):
                socketTimeoutMS = 2 * 1000

                import pymongo
                from arctic import Arctic

                fname_single = os.path.basename(fname_single).replace('.', '_')

                logger.info('Load Arctic/MongoDB library: ' + fname_single)

                if username is not None and password is not None:
                    c = pymongo.MongoClient(
                        host="mongodb://" + username + ":" + password + "@" +
                        str(db_server) + ":" + str(db_port),
                        connect=False
                    )  # , username=username, password=password)
                else:
                    c = pymongo.MongoClient(host="mongodb://" +
                                            str(db_server) + ":" +
                                            str(db_port),
                                            connect=False)

                store = Arctic(c,
                               socketTimeoutMS=socketTimeoutMS,
                               serverSelectionTimeoutMS=socketTimeoutMS)

                # Access the library
                try:
                    library = store[fname_single]

                    if start_date is None and finish_date is None:
                        item = library.read(fname_single)

                    else:
                        from arctic.date import DateRange
                        item = library.read(
                            fname_single,
                            date_range=DateRange(
                                start_date.replace(tzinfo=None),
                                finish_date.replace(tzinfo=None)))

                    c.close()

                    logger.info('Read ' + fname_single)

                    data_frame = item.data

                except Exception as e:
                    logger.warning('Library may not exist or another error: ' +
                                   fname_single + ' & message is ' + str(e))
                    data_frame = None

            elif self.path_exists(self.get_h5_filename(fname_single)):
                store = pandas.HDFStore(self.get_h5_filename(fname_single))
                data_frame = store.select("data")

                if ('intraday' in fname_single):
                    data_frame = data_frame.astype('float32')

                store.close()

            elif self.path_exists(fname_single) and '.csv' in fname_single:
                data_frame = pandas.read_csv(fname_single, index_col=0)

                data_frame.index = pd.to_datetime(data_frame.index)

            elif self.path_exists(fname_single):
                data_frame = self.read_parquet(fname_single)
                # data_frame = pandas.read_parquet(fname_single)

            data_frame_list.append(data_frame)

        if len(data_frame_list) == 1:
            return data_frame_list[0]

        return data_frame_list
コード例 #26
0
    def __init__(self,
                 data_source=None,
                 start_date='year',
                 finish_date=datetime.datetime.utcnow(),
                 tickers=None,
                 category=None,
                 freq_mult=1,
                 freq="daily",
                 gran_freq=None,
                 cut="NYC",
                 fields=['close'],
                 cache_algo="internet_load_return",
                 vendor_tickers=None,
                 vendor_fields=None,
                 environment="backtest",
                 trade_side='trade',
                 expiry_date=None,
                 md_request=None,
                 abstract_curve=None,
                 overrides={}):

        self.logger = LoggerManager().getLogger(__name__)

        # can deep copy MarketDataRequest (use a lock, so can be used with threading when downloading time series)
        if md_request is not None:
            import threading
            lock = threading.Lock()

            with lock:
                import copy

                self.freq_mult = copy.deepcopy(md_request.freq_mult)

                # define frequency of data
                self.gran_freq = copy.deepcopy(md_request.gran_freq)
                self.freq_mult = copy.deepcopy(md_request.freq_mult)
                self.freq = copy.deepcopy(md_request.freq)

                # data source, start and fin
                self.data_source = copy.deepcopy(md_request.data_source)
                self.start_date = copy.deepcopy(md_request.start_date)
                self.finish_date = copy.deepcopy(md_request.finish_date)

                self.category = copy.deepcopy(
                    md_request.category)  # special predefined categories

                self.cut = copy.deepcopy(
                    md_request.cut
                )  # closing time of the data (eg. NYC, LDN, TOK etc)
                self.fields = copy.deepcopy(
                    md_request.fields)  # fields, eg. close, high, low, open
                self.cache_algo = copy.deepcopy(
                    md_request.cache_algo
                )  # internet_load_return (cache_algo_return is for future use)
                self.vendor_tickers = copy.deepcopy(
                    md_request.vendor_tickers)  # define vendor tickers
                self.vendor_fields = copy.deepcopy(
                    md_request.vendor_fields)  # define vendor fields
                self.environment = copy.deepcopy(
                    md_request.environment
                )  # backtest environment only supported at present
                self.trade_side = copy.deepcopy(md_request.trade_side)
                self.expiry_date = copy.deepcopy(md_request.expiry_date)
                # self.abstract_curve = copy.deepcopy(md_request.abstract_curve)
                self.overrides = copy.deepcopy(md_request.overrides)

                self.tickers = copy.deepcopy(
                    md_request.tickers
                )  # need this after category in case have wildcard
        else:
            self.freq_mult = freq_mult

            # define frequency of data
            self.gran_freq = gran_freq
            self.freq_mult = freq_mult
            self.freq = freq

            # data source, start and fin
            self.data_source = data_source
            self.start_date = start_date
            self.finish_date = finish_date
            self.category = category  # special predefined categories

            self.cut = cut  # closing time of the data (eg. NYC, LDN, TOK etc)
            self.fields = fields  # fields, eg. close, high, low, open
            self.cache_algo = cache_algo  # internet_load_return (cache_algo_return is for future use)
            self.vendor_tickers = vendor_tickers  # define vendor tickers
            self.vendor_fields = vendor_fields  # define vendor fields
            self.environment = environment  # backtest environment only supported at present
            self.trade_side = trade_side
            self.expiry_date = expiry_date
            self.abstract_curve = abstract_curve

            self.overrides = overrides

            self.tickers = tickers
コード例 #27
0
    def process_message(self, msg):
        data = msg.getElement(self.BAR_DATA).getElement(self.BAR_TICK_DATA)

        logger = LoggerManager().getLogger(__name__)

        # logger.info("Processing intraday data for " + str(self._options.security))

        data_vals = list(data.values())

        # data_matrix = numpy.zeros([len(data_vals), 6])
        # data_matrix.fill(numpy.nan)
        #
        # date_index = [None] * len(data_vals)
        #
        # for i in range(0, len(data_vals)):
        #     data_matrix[i][0] = data_vals[i].getElementAsFloat(self.OPEN)
        #     data_matrix[i][1] = data_vals[i].getElementAsFloat(self.HIGH)
        #     data_matrix[i][2] = data_vals[i].getElementAsFloat(self.LOW)
        #     data_matrix[i][3] = data_vals[i].getElementAsFloat(self.CLOSE)
        #     data_matrix[i][4] = data_vals[i].getElementAsInteger(self.VOLUME)
        #     data_matrix[i][5] = data_vals[i].getElementAsInteger(self.NUM_EVENTS)
        #
        #     date_index[i] = data_vals[i].getElementAsDatetime(self.TIME)
        #
        # logger.info("Dates between " + str(date_index[0]) + " - " + str(date_index[-1]))
        #
        # # create pandas dataframe with the Bloomberg output
        # return pd.DataFrame(data = data_matrix, index = date_index,
        #                columns=['open', 'high', 'low', 'close', 'volume', 'events'])

        ## for loop method is touch slower
        # time_list = []
        # data_table = []

        # for bar in data_vals:
        #     data_table.append([bar.getElementAsFloat(self.OPEN),
        #                  bar.getElementAsFloat(self.HIGH),
        #                  bar.getElementAsFloat(self.LOW),
        #                  bar.getElementAsFloat(self.CLOSE),
        #                  bar.getElementAsInteger(self.VOLUME),
        #                  bar.getElementAsInteger(self.NUM_EVENTS)])
        #
        #     time_list.append(bar.getElementAsDatetime(self.TIME))

        # each price time point has multiple fields - marginally quicker
        tuple = [([
            bar.getElementAsFloat(self.OPEN),
            bar.getElementAsFloat(self.HIGH),
            bar.getElementAsFloat(self.LOW),
            bar.getElementAsFloat(self.CLOSE),
            bar.getElementAsInteger(self.VOLUME),
            bar.getElementAsInteger(self.NUM_EVENTS)
        ], bar.getElementAsDatetime(self.TIME)) for bar in data_vals]

        data_table = list(map(itemgetter(0), tuple))
        time_list = list(map(itemgetter(1), tuple))

        try:
            logger.info("Dates between " + str(time_list[0]) + " - " +
                        str(time_list[-1]))
        except:
            logger.info("No dates retrieved")
            return None

        # create pandas dataframe with the Bloomberg output
        return pd.DataFrame(
            data=data_table,
            index=time_list,
            columns=['open', 'high', 'low', 'close', 'volume', 'events'])
コード例 #28
0
 def __init__(self):
     self.logger = LoggerManager().getLogger(__name__)
     self._techind = None
     self._signal = None
コード例 #29
0
    def load_time_series(self, market_data_request):

        # if(BBGLowLevelTemplate._session is None):
        logger = LoggerManager().getLogger(__name__)

        session = self.start_bloomberg_session()
        # else:
        #    session = BBGLowLevelTemplate._session

        try:
            # if can't open the session, kill existing one
            # then try reopen (up to 5 times...)
            i = 0

            while i < 5:
                if session is not None:
                    if not session.openService("//blp/refdata"):
                        logger.info("Try reopening Bloomberg session... try " +
                                    str(i))
                        self.kill_session(
                            session
                        )  # need to forcibly kill_session since can't always reopen
                        session = self.start_bloomberg_session()

                        if session is not None:
                            if session.openService("//blp/refdata"): i = 6
                else:
                    logger.info("Try opening Bloomberg session... try " +
                                str(i))
                    session = self.start_bloomberg_session()

                i = i + 1

            # give error if still doesn't work after several tries..
            if not session.openService("//blp/refdata"):
                logger.error("Failed to open //blp/refdata")

                return

            logger.info("Creating request...")

            eventQueue = blpapi.EventQueue()
            # eventQueue = None

            # create a request
            from blpapi import CorrelationId
            cid = CorrelationId()
            options = self.fill_options(market_data_request)

            if options.security is not None:
                self.send_bar_request(session, eventQueue, options, cid)

                logger.info("Waiting for data to be returned...")

                data_frame = self.event_loop(session)
            else:
                logger.warn("No ticker or field specified!")

                data_frame = None
        finally:
            # stop the session (will fail if NoneType)
            try:
                session.stop()
            except:
                pass

        return data_frame
コード例 #30
0
ファイル: backtestrequest.py プロジェクト: frrp/finmarketpy
    def __init__(self):
        super(MarketDataRequest, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        self.__signal_name = None
        self.__tech_params = TechParams()
コード例 #31
0
    def process_message(self, msg):
        logger = LoggerManager().getLogger(__name__)
        data = collections.defaultdict(dict)

        # process received events
        securityDataArray = msg.getElement('securityData')

        index = 0
        single = False

        for securityData in list(securityDataArray.values()):

            ticker = securityData.getElementAsString("security")
            fieldData = securityData.getElement("fieldData")

            for field in fieldData.elements():
                if not field.isValid():
                    field_name = "%s" % field.name()

                    logger.error(field_name + " is NULL")
                elif field.isArray():
                    # iterate over complex data returns.
                    field_name = "%s" % field.name()

                    for i, row in enumerate(field.values()):
                        try:
                            field_val = re.findall(r'"(.*?)"', "%s" % row)[0]
                        except:
                            e = row.getElement(0)
                            # k = str(e.name())
                            field_val = e.getValue()

                        data[(field_name, ticker)][index] = field_val

                        index = index + 1
                else:
                    field_name = "%s" % field.name()
                    data[(field_name, ticker)][0] = field.getValueAsString()

                    index = index + 1
                    single = True  # no need to create multi-index late, because just row!! CAREFUL!! needed for futures expiries

            fieldExceptionArray = securityData.getElement("fieldExceptions")

            for fieldException in list(fieldExceptionArray.values()):
                errorInfo = fieldException.getElement("errorInfo")

                print(errorInfo.getElementAsString("category"), ":", \
                      fieldException.getElementAsString("fieldId"))
                print("stop")

        # explicitly state from_dict (buggy if create pd.DataFrame(data)
        data_frame = pd.DataFrame.from_dict(data)

        # if obsolete ticker could return no values
        if (not (data_frame.empty)):
            # if not(single):
            #    pass
            # data_frame.columns = pd.MultiIndex.from_tuples(data, names=['field', 'ticker'])

            logger.info("Reading: " + ticker + ' ' + str(data_frame.index[0]) +
                        ' - ' + str(data_frame.index[-1]))
        else:
            return None

        return data_frame
コード例 #32
0
ファイル: backtestrequest.py プロジェクト: frrp/finmarketpy
class BacktestRequest(MarketDataRequest):
    """Contains parameters necessary to define a backtest, including start date, finish date, transaction cost, etc

    Used by TradingModel and Backtest to construct backtested returns for trading strategies

    """
    def __init__(self):
        super(MarketDataRequest, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        self.__signal_name = None
        self.__tech_params = TechParams()

    @property
    def signal_name(self):
        return self.__signal_name

    @signal_name.setter
    def signal_name(self, signal_name):
        self.__signal_name = signal_name

    @property
    def tech_params(self):
        return self.__tech_params

    @tech_params.setter
    def tech_params(self, tech_params):
        self.__tech_params = tech_params

    @property
    def spot_tc_bp(self):
        return self.__spot_tc_bp

    @spot_tc_bp.setter
    def spot_tc_bp(self, spot_tc_bp):
        self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0)

    @property
    def asset(self):
        return self.__asset

    @asset.setter
    def asset(self, asset):
        valid_asset = ['fx', 'multi-asset']

        if not asset in valid_asset:
            self.logger.warning(asset & " is not a defined asset.")

        self.__asset = asset

    @property
    def instrument(self):
        return self.__instrument

    @instrument.setter
    def instrument(self, instrument):
        valid_instrument = ['spot', 'futures', 'options']

        if not instrument in valid_instrument:
            self.logger.warning(instrument
                                & " is not a defined trading instrument.")

        self.__instrument = instrument
コード例 #33
0
class BacktestRequest(MarketDataRequest):
    """Contains parameters necessary to define a backtest, including start date, finish date, transaction cost, etc

    Used by TradingModel and Backtest to construct backtested returns for trading strategies

    """
    def __init__(self):
        super(MarketDataRequest, self).__init__()
        self.logger = LoggerManager().getLogger(__name__)

        self.__signal_name = None

        # output parameters for backtest (should we add returns statistics on legends, write CSVs with returns etc.)
        self.__plot_start = None
        self.__calc_stats = True
        self.__write_csv = False
        self.__plot_interim = False
        self.__include_benchmark = False

        self.__tech_params = TechParams()

        # default parameters for portfolio level vol adjustment
        self.__portfolio_vol_adjust = False
        self.__portfolio_vol_period_shift = 0
        self.__portfolio_vol_rebalance_freq = None
        self.__portfolio_vol_resample_freq = None
        self.__portfolio_vol_resample_type = 'mean'
        self.__portfolio_vol_target = 0.1  # 10% vol target
        self.__portfolio_vol_max_leverage = None
        self.__portfolio_vol_periods = 20
        self.__portfolio_vol_obs_in_year = 252

        # default parameters for signal level vol adjustment
        self.__signal_vol_adjust = False
        self.__signal_vol_period_shift = 0
        self.__signal_vol_rebalance_freq = None
        self.__signal_vol_resample_freq = None
        self.__signal_vol_resample_type = 'mean'
        self.__signal_vol_target = 0.1  # 10% vol target
        self.__signal_vol_max_leverage = None
        self.__signal_vol_periods = 20
        self.__signal_vol_obs_in_year = 252

        # portfolio notional size
        self.__portfolio_notional_size = None
        self.__portfolio_combination = None

        # parameters for maximum position limits (expressed as whole portfolio)
        self.__max_net_exposure = None
        self.__max_abs_exposure = None

        self.__position_clip_rebalance_freq = None
        self.__position_clip_resample_freq = None  # by default apply max position criterion on last business day of month
        self.__position_clip_resample_type = 'mean'
        self.__position_clip_period_shift = 0

        # take profit and stop loss parameters
        self.__take_profit = None
        self.__stop_loss = None

        # should we delay the signal?
        self.__signal_delay = 0

    ##### properties for output of the backtest
    @property
    def plot_start(self):
        return self.__plot_start

    @plot_start.setter
    def plot_start(self, plot_start):
        self.__plot_start = plot_start

    @property
    def calc_stats(self):
        return self.__calc_stats

    @calc_stats.setter
    def calc_stats(self, calc_stats):
        self.__calc_stats = calc_stats

    @property
    def write_csv(self):
        return self.__write_csv

    @write_csv.setter
    def write_csv(self, write_csv):
        self.__write_csv = write_csv

    @property
    def plot_interim(self):
        return self.__plot_interim

    @plot_interim.setter
    def plot_interim(self, plot_interim):
        self.__plot_interim = plot_interim

    @property
    def include_benchmark(self):
        return self.__include_benchmark

    @include_benchmark.setter
    def include_benchmark(self, include_benchmark):
        self.__include_benchmark = include_benchmark

    ##### properties for portfolio level volatility adjustment
    @property
    def portfolio_vol_adjust(self):
        return self.__portfolio_vol_adjust

    @portfolio_vol_adjust.setter
    def portfolio_vol_adjust(self, portfolio_vol_adjust):
        self.__portfolio_vol_adjust = portfolio_vol_adjust

    @property
    def portfolio_vol_rebalance_freq(self):
        return self.__portfolio_vol_rebalance_freq

    @portfolio_vol_rebalance_freq.setter
    def portfolio_vol_rebalance_freq(self, portfolio_vol_rebalance_freq):
        self.__portfolio_vol_rebalance_freq = portfolio_vol_rebalance_freq

    @property
    def portfolio_vol_resample_type(self):
        return self.__portfolio_vol_resample_type

    @portfolio_vol_resample_type.setter
    def portfolio_vol_resample_type(self, portfolio_vol_resample_type):
        self.__portfolio_vol_resample_type = portfolio_vol_resample_type

    @property
    def portfolio_vol_resample_freq(self):
        return self.__portfolio_vol_resample_freq

    @portfolio_vol_resample_freq.setter
    def portfolio_vol_resample_freq(self, portfolio_vol_resample_freq):
        self.__portfolio_vol_resample_freq = portfolio_vol_resample_freq

    @property
    def portfolio_vol_period_shift(self):
        return self.__portfolio_vol_period_shift

    @portfolio_vol_period_shift.setter
    def portfolio_vol_period_shift(self, portfolio_vol_period_shift):
        self.__portfolio_vol_period_shift = portfolio_vol_period_shift

    @property
    def portfolio_vol_target(self):
        return self.__portfolio_vol_target

    @portfolio_vol_target.setter
    def portfolio_vol_target(self, portfolio_vol_target):
        self.__portfolio_vol_target = portfolio_vol_target

    @property
    def portfolio_vol_max_leverage(self):
        return self.__portfolio_vol_max_leverage

    @portfolio_vol_max_leverage.setter
    def portfolio_vol_max_leverage(self, portfolio_vol_max_leverage):
        self.__portfolio_vol_max_leverage = portfolio_vol_max_leverage

    @property
    def portfolio_vol_periods(self):
        return self.__portfolio_vol_periods

    @portfolio_vol_periods.setter
    def portfolio_vol_periods(self, portfolio_vol_periods):
        self.__portfolio_vol_periods = portfolio_vol_periods

    @property
    def portfolio_vol_obs_in_year(self):
        return self.__portfolio_vol_obs_in_year

    @portfolio_vol_obs_in_year.setter
    def portfolio_vol_obs_in_year(self, portfolio_vol_obs_in_year):
        self.__portfolio_vol_obs_in_year = portfolio_vol_obs_in_year

    ##### properties for signal level vol adjustment
    @property
    def signal_vol_adjust(self):
        return self.__signal_vol_adjust

    @signal_vol_adjust.setter
    def signal_vol_adjust(self, signal_vol_adjust):
        self.__signal_vol_adjust = signal_vol_adjust

    @property
    def signal_vol_rebalance_freq(self):
        return self.__signal_vol_rebalance_freq

    @signal_vol_rebalance_freq.setter
    def signal_vol_rebalance_freq(self, signal_vol_rebalance_freq):
        self.__signal_vol_rebalance_freq = signal_vol_rebalance_freq

    @property
    def signal_vol_resample_type(self):
        return self.__signal_vol_resample_type

    @signal_vol_resample_type.setter
    def signal_vol_resample_type(self, signal_vol_resample_type):
        self.__signal_vol_resample_type = signal_vol_resample_type

    @property
    def signal_vol_resample_freq(self):
        return self.__signal_vol_resample_freq

    @signal_vol_resample_freq.setter
    def signal_vol_resample_freq(self, signal_vol_resample_freq):
        self.__signal_vol_resample_freq = signal_vol_resample_freq

    @property
    def signal_vol_period_shift(self):
        return self.__signal_vol_period_shift

    @signal_vol_period_shift.setter
    def signal_vol_period_shift(self, signal_vol_period_shift):
        self.__signal_vol_period_shift = signal_vol_period_shift

    @property
    def signal_vol_target(self):
        return self.__signal_vol_target

    @signal_vol_target.setter
    def signal_vol_target(self, signal_vol_target):
        self.__signal_vol_target = signal_vol_target

    @property
    def signal_vol_max_leverage(self):
        return self.__signal_vol_max_leverage

    @signal_vol_max_leverage.setter
    def signal_vol_max_leverage(self, signal_vol_max_leverage):
        self.__signal_vol_max_leverage = signal_vol_max_leverage

    @property
    def signal_vol_periods(self):
        return self.__signal_vol_periods

    @signal_vol_periods.setter
    def signal_vol_periods(self, signal_vol_periods):
        self.__signal_vol_periods = signal_vol_periods

    @property
    def signal_vol_obs_in_year(self):
        return self.__signal_vol_obs_in_year

    @signal_vol_obs_in_year.setter
    def signal_vol_obs_in_year(self, signal_vol_obs_in_year):
        self.__signal_vol_obs_in_year = signal_vol_obs_in_year

    ##### portfolio notional size
    @property
    def portfolio_notional_size(self):
        return self.__portfolio_notional_size

    @portfolio_notional_size.setter
    def portfolio_notional_size(self, portfolio_notional_size):
        self.__portfolio_notional_size = float(portfolio_notional_size)

    ##### portfolio weights (sum, mean or dictionary of weights)
    @property
    def portfolio_combination(self):
        return self.__portfolio_combination

    @portfolio_combination.setter
    def portfolio_combination(self, portfolio_combination):
        self.__portfolio_combination = portfolio_combination

    ##### properties for maximum position constraints
    @property
    def max_net_exposure(self):
        return self.__max_net_exposure

    @max_net_exposure.setter
    def max_net_exposure(self, max_net_exposure):
        self.__max_net_exposure = max_net_exposure

    @property
    def max_abs_exposure(self):
        return self.__max_abs_exposure

    @max_abs_exposure.setter
    def max_abs_exposure(self, max_abs_exposure):
        self.__max_abs_exposure = max_abs_exposure

    @property
    def position_clip_rebalance_freq(self):
        return self.__position_clip_rebalance_freq

    @position_clip_rebalance_freq.setter
    def position_clip_rebalance_freq(self, position_clip_rebalance_freq):
        self.__position_clip_rebalance_freq = position_clip_rebalance_freq

    @property
    def position_clip_resample_type(self):
        return self.__position_clip_resample_type

    @position_clip_resample_type.setter
    def position_clip_resample_type(self, position_clip_resample_type):
        self.__position_clip_resample_type = position_clip_resample_type

    @property
    def position_clip_resample_freq(self):
        return self.__position_clip_resample_freq

    @position_clip_resample_freq.setter
    def position_clip_resample_freq(self, position_clip_resample_freq):
        self.__position_clip_resample_freq = position_clip_resample_freq

    @property
    def position_clip_period_shift(self):
        return self.__position_clip_period_shift

    @position_clip_period_shift.setter
    def position_clip_period_shift(self, position_clip_period_shift):
        self.__position_clip_period_shift = position_clip_period_shift

    ##### stop loss and take profit
    @property
    def stop_loss(self):
        return self.__stop_loss

    @stop_loss.setter
    def stop_loss(self, stop_loss):
        self.__stop_loss = stop_loss

    @property
    def take_profit(self):
        return self.__take_profit

    @take_profit.setter
    def take_profit(self, take_profit):
        self.__take_profit = take_profit

    ##### tech indicators and spot bp tc
    @property
    def tech_params(self):
        return self.__tech_params

    @tech_params.setter
    def tech_params(self, tech_params):
        self.__tech_params = tech_params

    @property
    def spot_tc_bp(self):
        return self.__spot_tc_bp

    @spot_tc_bp.setter
    def spot_tc_bp(self, spot_tc_bp):
        self.__spot_tc_bp = spot_tc_bp / (2.0 * 100.0 * 100.0)

    #### FOR FUTURE USE ###

    @property
    def signal_name(self):
        return self.__signal_name

    @signal_name.setter
    def signal_name(self, signal_name):
        self.__signal_name = signal_name

    @property
    def asset(self):
        return self.__asset

    @asset.setter
    def asset(self, asset):
        valid_asset = ['fx', 'multi-asset']

        if not asset in valid_asset:
            self.logger.warning(asset & " is not a defined asset.")

        self.__asset = asset

    @property
    def instrument(self):
        return self.__instrument

    @instrument.setter
    def instrument(self, instrument):
        valid_instrument = ['spot', 'futures', 'options']

        if not instrument in valid_instrument:
            self.logger.warning(instrument
                                & " is not a defined trading instrument.")

        self.__instrument = instrument

    @property
    def signal_delay(self):
        return self.__signal_delay

    @signal_delay.setter
    def signal_delay(self, signal_delay):
        self.__signal_delay = signal_delay
コード例 #34
0
if run_example == 1 or run_example == 0:
    # for backtest and loading data
    from finmarketpy.backtest import BacktestRequest, Backtest
    from findatapy.market import Market, MarketDataRequest, MarketDataGenerator
    from findatapy.util.fxconv import FXConv

    # for logging
    from findatapy.util.loggermanager import LoggerManager

    # for signal generation
    from finmarketpy.economics import TechIndicator, TechParams

    # for plotting
    from chartpy import Chart, Style

    logger = LoggerManager().getLogger(__name__)

    import datetime

    backtest = Backtest()
    br = BacktestRequest()
    fxconv = FXConv()

    # get all asset data
    br.start_date = "02 Jan 1990"
    br.finish_date = datetime.datetime.utcnow()
    br.spot_tc_bp = 2.5  # 2.5 bps bid/ask spread
    br.ann_factor = 252

    # have vol target for each signal
    br.signal_vol_adjust = True
コード例 #35
0
ファイル: fxconv.py プロジェクト: yashyennam/findatapy
        try:
            base_index = self.order.index(base)
        except ValueError:
            base_index = -1

        try:
            terms_index = self.order.index(terms)
        except ValueError:
            terms_index = -1

        if (base_index < 0 and terms_index > 0):
            return terms + base
        if (base_index > 0 and terms_index < 0):
            return base + terms
        elif (base_index > terms_index):
            return terms + base
        elif (terms_index > base_index):
            return base + terms

        return cross


if __name__ == '__main__':
    logger = LoggerManager.getLogger(__name__)

    fxconv = FXConv()

    if True:
        logger.info(fxconv.g10_crosses())
コード例 #36
0
ファイル: ioengine.py プロジェクト: shashwat1e4/findatapy
class IOEngine(object):
    """Write and reads time series data to disk in various formats, CSV, HDF5 (fixed and table formats) and MongoDB/Arctic.

    Can be used to save down output of finmarketpy backtests and also to cache market data locally.

    Also supports BColz (but not currently stable). Planning to add other interfaces such as SQL etc.

    """

    def __init__(self):
        self.logger = LoggerManager().getLogger(__name__)

    ### functions to handle Excel on disk
    def write_time_series_to_excel(self, fname, sheet, data_frame, create_new=False):
        """Writes Pandas data frame to disk in Excel format

        Parameters
        ----------
        fname : str
            Excel filename to be written to
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        create_new : boolean
            to create a new Excel file
        """

        if (create_new):
            writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
        else:
            if os.path.isfile(fname):
                book = load_workbook(fname)
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')
                writer.book = book
                writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
            else:
                writer = pandas.ExcelWriter(fname, engine='xlsxwriter')

        data_frame.to_excel(writer, sheet_name=sheet, engine='xlsxwriter')

        writer.save()
        writer.close()

    def write_time_series_to_excel_writer(self, writer, sheet, data_frame):
        """Writes Pandas data frame to disk in Excel format for a writer

        Parameters
        ----------
        writer : ExcelWriter
            File handle to use for writing Excel file to disk
        sheet : str
            sheet in excel
        data_frame : DataFrame
            data frame to be written
        """
        data_frame.to_excel(writer, sheet, engine='xlsxwriter')

    def read_excel_data_frame(self, f_name, excel_sheet, freq, cutoff=None, dateparse=None,
                              postfix='.close', intraday_tz='UTC'):
        """Reads Excel from disk into DataFrame

        Parameters
        ----------
        f_name : str
            Excel file path to read
        freq : str
            Frequency of data to read (intraday/daily etc)
        cutoff : DateTime (optional)
            end date to read up to
        dateparse : str (optional)
            date parser to use
        postfix : str (optional)
            postfix to add to each columns
        intraday_tz : str
            timezone of file if uses intraday data

        Returns
        -------
        DataFrame
        """

        return self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse,
                                        postfix=postfix, intraday_tz=intraday_tz, excel_sheet=excel_sheet)

    def remove_time_series_cache_on_disk(self, fname, engine='hdf5_fixed', db_server='127.0.0.1', db_port='6379',
                                         timeout=10, username=None,
                                         password=None):

        if 'hdf5' in engine:
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            pass
        elif (engine == 'redis'):

            fname = os.path.basename(fname).replace('.', '_')

            try:
                r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout,
                                      socket_connect_timeout=timeout)

                if (fname == 'flush_all_keys'):
                    r.flushall()
                else:
                    # allow deletion of keys by pattern matching

                    x = r.keys('*' + fname)

                    if len(x) > 0:
                        r.delete(x)

                    # r.delete(fname)

            except Exception as e:
                self.logger.warning("Cannot delete non-existent key " + fname + " in Redis: " + str(e))

        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load MongoDB library: ' + fname)

            if username is not None and password is not None:
                c = pymongo.MongoClient(
                    host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                    connect=False)  # , username=username, password=password)
            else:
                c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

            store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS,
                           connectTimeoutMS=socketTimeoutMS)

            store.delete_library(fname)

            c.close()

            self.logger.info("Deleted MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # delete the old copy
            try:
                os.remove(h5_filename)
            except:
                pass

    ### functions to handle HDF5 on disk
    def write_time_series_cache_to_disk(self, fname, data_frame,
                                        engine='hdf5_fixed', append_data=False, db_server=DataConstants().db_server,
                                        db_port=DataConstants().db_port, username=None, password=None,
                                        filter_out_matching=None, timeout=10,
                                        use_cache_compression=DataConstants().use_cache_compression):
        """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        engine : str
            'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this
            'hdf5_table' - use HDF5 table format, slower but can append to
            'parquet' - use Parquet
            'arctic' - use Arctic/MongoDB database
            'redis' - use Redis
        append_data : bool
            False - write a fresh copy of data on disk each time
            True - append data to disk
        db_server : str
            Database server for arctic (default: '127.0.0.1')
        timeout : int
            Number of seconds to do timeout
        """

        # default HDF5 format
        hdf5_format = 'fixed'

        if 'hdf5' in engine:
            hdf5_format = engine.split('_')[1]
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            data_frame.columns = self.find_replace_chars(data_frame.columns, _invalid_chars, _replace_chars)
            data_frame.columns = ['A_' + x for x in data_frame.columns]

            data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns')

            bcolzpath = self.get_bcolz_filename(fname)
            shutil.rmtree(bcolzpath, ignore_errors=True)
            zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath)
        elif (engine == 'redis'):

            fname = os.path.basename(fname).replace('.', '_')

            try:
                r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout,
                                      socket_connect_timeout=timeout)

                if data_frame is not None:
                    if isinstance(data_frame, pandas.DataFrame):
                        # msgpack/blosc is deprecated
                        # r.set(fname, data_frame.to_msgpack(compress='blosc'))

                        # now uses pyarrow
                        context = pa.default_serialization_context()

                        ser = context.serialize(data_frame).to_buffer()

                        if use_cache_compression:
                            comp = pa.compress(ser, codec='lz4', asbytes=True)
                            siz = len(ser)  # siz = 3912

                            r.set('comp_' + str(siz) + '_' + fname, comp)
                        else:
                            r.set(fname, ser.to_pybytes())

                    self.logger.info("Pushed " + fname + " to Redis")
                else:
                    self.logger.info("Object " + fname + " is empty, not pushed to Redis.")

            except Exception as e:
                self.logger.warning("Couldn't push " + fname + " to Redis: " + str(e))

        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load Arctic/MongoDB library: ' + fname)

            if username is not None and password is not None:
                c = pymongo.MongoClient(
                    host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                    connect=False)  # , username=username, password=password)
            else:
                c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

            store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS,
                           connectTimeoutMS=socketTimeoutMS)

            database = None

            try:
                database = store[fname]
            except:
                pass

            if database is None:
                store.initialize_library(fname, audit=False)
                self.logger.info("Created MongoDB library: " + fname)
            else:
                self.logger.info("Got MongoDB library: " + fname)

            # Access the library
            library = store[fname]

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            if filter_out_matching is not None:
                cols = data_frame.columns

                new_cols = []

                for col in cols:
                    if filter_out_matching not in col:
                        new_cols.append(col)

                data_frame = data_frame[new_cols]

            # problems with Arctic when writing timezone to disk sometimes, so strip
            data_frame = data_frame.copy().tz_localize(None)

            # can duplicate values if we have existing dates
            if append_data:
                library.append(fname, data_frame)
            else:
                library.write(fname, data_frame)

            c.close()

            self.logger.info("Written MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # append data only works for HDF5 stored as tables (but this is much slower than fixed format)
            # removes duplicated entries at the end
            if append_data:
                store = pandas.HDFStore(h5_filename, format=hdf5_format, complib="zlib", complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                # get last row which matches and remove everything after that (because append
                # function doesn't check for duplicated rows
                nrows = len(store['data'].index)
                last_point = data_frame.index[-1]

                i = nrows - 1

                while (i > 0):
                    read_index = store.select('data', start=i, stop=nrows).index[0]

                    if (read_index <= last_point): break

                    i = i - 1

                # remove rows at the end, which are duplicates of the incoming time series
                store.remove(key='data', start=i, stop=nrows)
                store.put(key='data', value=data_frame, format=hdf5_format, append=True)
                store.close()
            else:
                h5_filename_temp = self.get_h5_filename(fname + ".temp")

                # delete the old copy
                try:
                    os.remove(h5_filename_temp)
                except:
                    pass

                store = pandas.HDFStore(h5_filename_temp, format=hdf5_format, complib="zlib", complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                store.put(key='data', value=data_frame, format=hdf5_format)
                store.close()

                # delete the old copy
                try:
                    os.remove(h5_filename)
                except:
                    pass

                # once written to disk rename
                os.rename(h5_filename_temp, h5_filename)

            self.logger.info("Written HDF5: " + fname)

        elif (engine == 'parquet'):
            if fname[-5:] != '.gzip':
                fname = fname + '.gzip'

            data_frame.to_parquet(fname, compression='gzip')

            self.logger.info("Written Parquet: " + fname)

    def get_h5_filename(self, fname):
        """Strips h5 off filename returning first portion of filename

        Parameters
        ----------
        fname : str
            h5 filename to strip

        Returns
        -------
        str
        """
        if fname[-3:] == '.h5':
            return fname

        return fname + ".h5"

    def get_bcolz_filename(self, fname):
        """Strips bcolz off filename returning first portion of filename

        Parameters
        ----------
        fname : str
            bcolz filename to strip

        Returns
        -------
        str
        """
        if fname[-6:] == '.bcolz':
            return fname

        return fname + ".bcolz"

    def write_r_compatible_hdf_dataframe(self, data_frame, fname, fields=None):
        """Write a DataFrame to disk in as an R compatible HDF5 file.

        Parameters
        ----------
        data_frame : DataFrame
            data frame to be written
        fname : str
            file path to be written
        fields : list(str)
            columns to be written
        """
        fname_r = self.get_h5_filename(fname)

        self.logger.info("About to dump R binary HDF5 - " + fname_r)
        data_frame32 = data_frame.astype('float32')

        if fields is None:
            fields = data_frame32.columns.values

        # decompose date/time into individual fields (easier to pick up in R)
        data_frame32['Year'] = data_frame.index.year
        data_frame32['Month'] = data_frame.index.month
        data_frame32['Day'] = data_frame.index.day
        data_frame32['Hour'] = data_frame.index.hour
        data_frame32['Minute'] = data_frame.index.minute
        data_frame32['Second'] = data_frame.index.second
        data_frame32['Millisecond'] = data_frame.index.microsecond / 1000

        data_frame32 = data_frame32[
            ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second', 'Millisecond'] + fields]

        cols = data_frame32.columns

        store_export = pandas.HDFStore(fname_r)
        store_export.put('df_for_r', data_frame32, data_columns=cols)
        store_export.close()

    def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None, finish_date=None,
                                         db_server=DataConstants().db_server,
                                         db_port=DataConstants().db_port, username=None, password=None):
        """Reads time series cache from disk in either HDF5 or bcolz

        Parameters
        ----------
        fname : str (or list)
            file to be read from
        engine : str (optional)
            'hd5' - reads HDF5 files (default)
            'arctic' - reads from Arctic/MongoDB database
            'bcolz' = reads from bcolz file (not fully implemented)
        start_date : str/datetime (optional)
            Start date
        finish_date : str/datetime (optional)
            Finish data
        db_server : str
            IP address of MongdDB (default '127.0.0.1')

        Returns
        -------
        DataFrame
        """

        logger = LoggerManager.getLogger(__name__)

        data_frame_list = []

        if not(isinstance(fname, list)):
            if '*' in fname:
                fname = glob.glob(fname)
            else:
                fname = [fname]

        for fname_single in fname:
            logger.debug("Reading " + fname_single + "..")

            if (engine == 'bcolz'):
                try:
                    name = self.get_bcolz_filename(fname_single)
                    zlens = bcolz.open(rootdir=name)
                    data_frame = zlens.todataframe()

                    data_frame.index = pandas.DatetimeIndex(data_frame['DTS_'])
                    data_frame.index.name = 'Date'
                    del data_frame['DTS_']

                    # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas
                    data_frame.columns = self.find_replace_chars(data_frame.columns, _replace_chars, _invalid_chars)
                    data_frame.columns = [x[2:] for x in data_frame.columns]
                except:
                    data_frame = None

            elif (engine == 'redis'):
                fname_single = os.path.basename(fname_single).replace('.', '_')

                msg = None

                try:
                    # for pyarrow
                    context = pa.default_serialization_context()

                    r = redis.StrictRedis(host=db_server, port=db_port, db=0)

                    # is there a compressed key stored?)
                    k = r.keys('comp_*_' + fname_single)

                    # if so, then it means that we have stored it as a compressed object
                    # if have more than 1 element, take the last (which will be the latest to be added)
                    if (len(k) >= 1):
                        k = k[-1].decode('utf-8')

                        comp = r.get(k)

                        siz = int(k.split('_')[1])
                        dec = pa.decompress(comp, codec='lz4', decompressed_size=siz)

                        msg = context.deserialize(dec)
                    else:
                        msg = r.get(fname_single)

                        # print(fname_single)
                        if msg is not None:
                            msg = context.deserialize(msg)
                            # self.logger.warning("Key " + fname_single + " not in Redis cache?")

                except Exception as e:
                    self.logger.info("Cache not existent for " + fname_single + " in Redis: " + str(e))

                if msg is None:
                    data_frame = None
                else:
                    self.logger.info('Load Redis cache: ' + fname_single)

                    data_frame = msg # pandas.read_msgpack(msg)

            elif (engine == 'arctic'):
                socketTimeoutMS = 2 * 1000

                import pymongo
                from arctic import Arctic

                fname_single = os.path.basename(fname_single).replace('.', '_')

                self.logger.info('Load Arctic/MongoDB library: ' + fname_single)

                if username is not None and password is not None:
                    c = pymongo.MongoClient(
                        host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                        connect=False)  # , username=username, password=password)
                else:
                    c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

                store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS)

                # Access the library
                try:
                    library = store[fname_single]

                    if start_date is None and finish_date is None:
                        item = library.read(fname_single)

                    else:
                        from arctic.date import DateRange
                        item = library.read(fname_single, date_range=DateRange(start_date.replace(tzinfo=None), finish_date.replace(tzinfo=None)))

                    c.close()

                    self.logger.info('Read ' + fname_single)

                    data_frame = item.data

                except Exception as e:
                    self.logger.warning('Library may not exist or another error: ' + fname_single + ' & message is ' + str(e))
                    data_frame = None

            elif os.path.isfile(self.get_h5_filename(fname_single)):
                store = pandas.HDFStore(self.get_h5_filename(fname_single))
                data_frame = store.select("data")

                if ('intraday' in fname_single):
                    data_frame = data_frame.astype('float32')

                store.close()

            elif os.path.isfile(fname_single):
                data_frame = pandas.read_parquet(fname_single)

            data_frame_list.append(data_frame)

        if len(data_frame_list) == 1:
            return data_frame_list[0]

        return data_frame_list

    ### functions for CSV reading and writing
    def write_time_series_to_csv(self, csv_path, data_frame):
        data_frame.to_csv(csv_path)

    def read_csv_data_frame(self, f_name, freq, cutoff=None, dateparse=None,
                            postfix='.close', intraday_tz='UTC', excel_sheet=None):
        """Reads CSV/Excel from disk into DataFrame

        Parameters
        ----------
        f_name : str
            CSV/Excel file path to read
        freq : str
            Frequency of data to read (intraday/daily etc)
        cutoff : DateTime (optional)
            end date to read up to
        dateparse : str (optional)
            date parser to use
        postfix : str (optional)
            postfix to add to each columns
        intraday_tz : str (optional)
            timezone of file if uses intraday data
        excel_sheet : str (optional)
            Excel sheet to be read

        Returns
        -------
        DataFrame
        """

        if (freq == 'intraday'):

            if dateparse is None:
                dateparse = lambda x: datetime.datetime(*map(int, [x[6:10], x[3:5], x[0:2],
                                                                   x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'dukascopy':
                dateparse = lambda x: datetime.datetime(*map(int, [x[0:4], x[5:7], x[8:10],
                                                                   x[11:13], x[14:16], x[17:19]]))
            elif dateparse is 'c':
                # use C library for parsing dates, several hundred times quicker
                # requires compilation of library to install
                import ciso8601
                dateparse = lambda x: ciso8601.parse_datetime(x)

            if excel_sheet is None:
                data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=True, date_parser=dateparse)
            else:
                data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA'])

            data_frame = data_frame.astype('float32')
            data_frame.index.names = ['Date']

            old_cols = data_frame.columns
            new_cols = []

            # add '.close' to each column name
            for col in old_cols:
                new_cols.append(col + postfix)

            data_frame.columns = new_cols
        else:
            # daily data
            if 'events' in f_name:

                data_frame = pandas.read_csv(f_name)

                # very slow conversion
                data_frame = data_frame.convert_objects(convert_dates='coerce')

            else:
                if excel_sheet is None:
                    try:
                        data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["DATE"], date_parser=dateparse)
                    except:
                        data_frame = pandas.read_csv(f_name, index_col=0, parse_dates=["Date"], date_parser=dateparse)
                else:
                    data_frame = pandas.read_excel(f_name, excel_sheet, index_col=0, na_values=['NA'])

        # convert Date to Python datetime
        # datetime data_frame['Date1'] = data_frame.index

        # slower method: lambda x: pandas.datetime.strptime(x, '%d/%m/%Y %H:%M:%S')
        # data_frame['Date1'].apply(lambda x: datetime.datetime(int(x[6:10]), int(x[3:5]), int(x[0:2]),
        #                                        int(x[12:13]), int(x[15:16]), int(x[18:19])))

        # data_frame.index = data_frame['Date1']
        # data_frame.drop('Date1')

        # slower method: data_frame.index = pandas.to_datetime(data_frame.index)

        if (freq == 'intraday'):
            # assume time series are already in UTC and assign this (can specify other time zones)
            data_frame = data_frame.tz_localize(intraday_tz)

        # end cutoff date
        if cutoff is not None:
            if (isinstance(cutoff, str)):
                cutoff = parse(cutoff)

            data_frame = data_frame.loc[data_frame.index < cutoff]

        return data_frame

    def find_replace_chars(self, array, to_find, replace_with):

        for i in range(0, len(to_find)):
            array = [x.replace(to_find[i], replace_with[i]) for x in array]

        return array

    def convert_csv_data_frame(self, f_name, category, freq, cutoff=None, dateparse=None):
        """Converts CSV file to HDF5 file

        Parameters
        ----------
        f_name : str
            File name to be read
        category : str
            data category of file (used in HDF5 filename)
        freq : str
            intraday/daily frequency (used in HDF5 filename)
        cutoff : DateTime (optional)
            filter dates up to here
        dateparse : str
            date parser to use
        """

        self.logger.info("About to read... " + f_name)

        data_frame = self.read_csv_data_frame(f_name, freq, cutoff=cutoff, dateparse=dateparse)

        category_f_name = self.create_cache_file_name(category)

        self.write_time_series_cache_to_disk(category_f_name, data_frame)

    def clean_csv_file(self, f_name):
        """Cleans up CSV file (removing empty characters) before writing back to disk

        Parameters
        ----------
        f_name : str
            CSV file to be cleaned
        """

        with codecs.open(f_name, 'rb', 'utf-8') as myfile:
            data = myfile.read()

            # clean file first if dirty
            if data.count('\x00'):
                self.logger.info('Cleaning CSV...')

                with codecs.open(f_name + '.tmp', 'w', 'utf-8') as of:
                    of.write(data.replace('\x00', ''))

                shutil.move(f_name + '.tmp', f_name)

    def create_cache_file_name(self, filename):
        return DataConstants().folder_time_series_data + "/" + filename

    # TODO refactor IOEngine so that each database is implemented in a subclass of DBEngine

    def get_engine(self, engine='hdf5_fixed'):
        pass
コード例 #37
0
ファイル: configmanager.py プロジェクト: zmaenpaa/findatapy
            source + '.' + sourceticker]

    @staticmethod
    def convert_vendor_to_library_field(source, sourcefield):
        return ConfigManager._dict_time_series_fields_list_vendor_to_library[
            source + '.' + sourcefield]

    @staticmethod
    def convert_library_to_vendor_field(source, field):
        return ConfigManager._dict_time_series_fields_list_library_to_vendor[
            source + '.' + field]


## test function
if __name__ == '__main__':
    logger = LoggerManager().getLogger(__name__)

    categories = ConfigManager().get_categories_from_fields()

    logger.info("Categories from fields list")
    print(categories)

    categories = ConfigManager().get_categories_from_tickers()

    logger.info("Categories from tickers list")
    print(categories)

    filter = 'events'

    categories_filtered = ConfigManager(
    ).get_categories_from_tickers_selective_filter(filter)
コード例 #38
0
if run_example == 1 or run_example == 0:
    # for backtest and loading data
    from finmarketpy.backtest import BacktestRequest, Backtest
    from findatapy.market import Market, MarketDataRequest, MarketDataGenerator
    from findatapy.util.fxconv import FXConv

    # for logging
    from findatapy.util.loggermanager import LoggerManager

    # for signal generation
    from finmarketpy.economics import TechIndicator, TechParams

    # for plotting
    from chartpy import Chart, Style

    logger = LoggerManager().getLogger(__name__)

    import datetime

    backtest = Backtest()
    br = BacktestRequest()
    fxconv = FXConv()

    # get all asset data
    br.start_date = "02 Jan 1990"
    br.finish_date = datetime.datetime.utcnow()
    br.spot_tc_bp = 2.5                             # 2.5 bps bid/ask spread
    br.ann_factor = 252

    # have vol target for each signal
    br.signal_vol_adjust = True