コード例 #1
0
    def process_response_event(self, event):
        data_frame_list = []

        logger = LoggerManager().getLogger(__name__)

        for msg in event:
            # Generates a lot of output - so don't use unless for
            # debugging purposes
            # logger.info(msg)

            if msg.hasElement(self.RESPONSE_ERROR):
                logger.error("REQUEST FAILED: " +
                             str(msg.getElement(self.RESPONSE_ERROR)))
                continue

            data_frame_slice = self.process_message(msg)

            if (data_frame_slice is not None):
                data_frame_list.append(data_frame_slice)

        if data_frame_list == []:
            logger.warn("No elements for ticker.")
            return None
        else:
            return pd.concat(data_frame_list)
コード例 #2
0
    def write_time_series_cache_to_disk(
            self,
            fname,
            data_frame,
            engine='hdf5_fixed',
            append_data=False,
            db_server=constants.db_server,
            db_port=constants.db_port,
            username=constants.db_username,
            password=constants.db_password,
            filter_out_matching=None,
            timeout=10,
            use_cache_compression=constants.use_cache_compression,
            parquet_compression=constants.parquet_compression,
            md_request=None,
            ticker=None):
        """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        engine : str
            'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this
            'hdf5_table' - use HDF5 table format, slower but can append to
            'parquet' - use Parquet
            'arctic' - use Arctic/MongoDB database
            'redis' - use Redis
        append_data : bool
            False - write a fresh copy of data on disk each time
            True - append data to disk
        db_server : str
            Database server for arctic (default: '127.0.0.1')
        timeout : int
            Number of seconds to do timeout
        """

        logger = LoggerManager().getLogger(__name__)

        if md_request is not None:
            fname = self.path_join(
                fname, md_request.create_category_key(ticker=ticker))

        # default HDF5 format
        hdf5_format = 'fixed'

        if 'hdf5' in engine:
            hdf5_format = engine.split('_')[1]
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            data_frame.columns = self.find_replace_chars(
                data_frame.columns, _invalid_chars, _replace_chars)
            data_frame.columns = ['A_' + x for x in data_frame.columns]

            data_frame['DTS_'] = pandas.to_datetime(data_frame.index,
                                                    unit='ns')

            bcolzpath = self.get_bcolz_filename(fname)
            shutil.rmtree(bcolzpath, ignore_errors=True)
            zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath)
        elif (engine == 'redis'):

            fname = os.path.basename(fname).replace('.', '_')

            # Will fail if Redis is not installed
            try:
                r = redis.StrictRedis(host=db_server,
                                      port=db_port,
                                      db=0,
                                      socket_timeout=timeout,
                                      socket_connect_timeout=timeout)

                ping = r.ping()

                # If Redis is alive, try pushing to it
                if ping:
                    if data_frame is not None:
                        if isinstance(data_frame, pandas.DataFrame):
                            mem = data_frame.memory_usage(deep='deep').sum()
                            mem_float = round(
                                float(mem) / (1024.0 * 1024.0), 3)

                            if mem_float < 500:
                                # msgpack/blosc is deprecated
                                # r.set(fname, data_frame.to_msgpack(compress='blosc'))

                                # now uses pyarrow
                                context = pa.default_serialization_context()

                                ser = context.serialize(data_frame).to_buffer()

                                if use_cache_compression:
                                    comp = pa.compress(ser,
                                                       codec='lz4',
                                                       asbytes=True)
                                    siz = len(ser)  # siz = 3912

                                    r.set('comp_' + str(siz) + '_' + fname,
                                          comp)
                                else:
                                    r.set(fname, ser.to_pybytes())

                                logger.info("Pushed " + fname + " to Redis")
                            else:
                                logger.warn("Did not push " + fname +
                                            " to Redis, given size")
                    else:
                        logger.info("Object " + fname +
                                    " is empty, not pushed to Redis.")
                else:
                    logger.warning("Didn't push " + fname +
                                   " to Redis given not running")

            except Exception as e:
                logger.warning("Couldn't push " + fname + " to Redis: " +
                               str(e))

        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            logger.info('Load Arctic/MongoDB library: ' + fname)

            if username is not None and password is not None:
                c = pymongo.MongoClient(
                    host="mongodb://" + username + ":" + password + "@" +
                    str(db_server) + ":" + str(db_port),
                    connect=False)  # , username=username, password=password)
            else:
                c = pymongo.MongoClient(host="mongodb://" + str(db_server) +
                                        ":" + str(db_port),
                                        connect=False)

            store = Arctic(c,
                           socketTimeoutMS=socketTimeoutMS,
                           serverSelectionTimeoutMS=socketTimeoutMS,
                           connectTimeoutMS=socketTimeoutMS)

            database = None

            try:
                database = store[fname]
            except:
                pass

            if database is None:
                store.initialize_library(fname, audit=False)
                logger.info("Created MongoDB library: " + fname)
            else:
                logger.info("Got MongoDB library: " + fname)

            # Access the library
            library = store[fname]

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            if filter_out_matching is not None:
                cols = data_frame.columns

                new_cols = []

                for col in cols:
                    if filter_out_matching not in col:
                        new_cols.append(col)

                data_frame = data_frame[new_cols]

            # Problems with Arctic when writing timezone to disk sometimes, so strip
            data_frame = data_frame.copy().tz_localize(None)

            try:
                # Can duplicate values if we have existing dates
                if append_data:
                    library.append(fname, data_frame)
                else:
                    library.write(fname, data_frame)

                c.close()
                logger.info("Written MongoDB library: " + fname)
            except Exception as e:
                logger.warning("Couldn't write MongoDB library: " + fname +
                               " " + str(e))

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # append data only works for HDF5 stored as tables (but this is much slower than fixed format)
            # removes duplicated entries at the end
            if append_data:
                store = pandas.HDFStore(h5_filename,
                                        format=hdf5_format,
                                        complib="zlib",
                                        complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                # get last row which matches and remove everything after that (because append
                # function doesn't check for duplicated rows
                nrows = len(store['data'].index)
                last_point = data_frame.index[-1]

                i = nrows - 1

                while (i > 0):
                    read_index = store.select('data', start=i,
                                              stop=nrows).index[0]

                    if (read_index <= last_point): break

                    i = i - 1

                # remove rows at the end, which are duplicates of the incoming time series
                store.remove(key='data', start=i, stop=nrows)
                store.put(key='data',
                          value=data_frame,
                          format=hdf5_format,
                          append=True)
                store.close()
            else:
                h5_filename_temp = self.get_h5_filename(fname + ".temp")

                # delete the old copy
                try:
                    os.remove(h5_filename_temp)
                except:
                    pass

                store = pandas.HDFStore(h5_filename_temp,
                                        complib="zlib",
                                        complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                store.put(key='data', value=data_frame, format=hdf5_format)
                store.close()

                # delete the old copy
                try:
                    os.remove(h5_filename)
                except:
                    pass

                # once written to disk rename
                os.rename(h5_filename_temp, h5_filename)

            logger.info("Written HDF5: " + fname)

        elif (engine == 'parquet'):
            if '.parquet' not in fname:
                if fname[-5:] != '.gzip':
                    fname = fname + '.parquet'

            self.to_parquet(data_frame,
                            fname,
                            aws_region=constants.aws_region,
                            parquet_compression=parquet_compression)
            # data_frame.to_parquet(fname, compression=parquet_compression)

            logger.info("Written Parquet: " + fname)
        elif engine == 'csv':
            if '.csv' not in fname:
                fname = fname + '.csv'

            data_frame.to_csv(fname)

            logger.info("Written CSV: " + fname)
コード例 #3
0
    def load_time_series(self, market_data_request):

        # if(BBGLowLevelTemplate._session is None):
        logger = LoggerManager().getLogger(__name__)

        session = self.start_bloomberg_session()
        # else:
        #    session = BBGLowLevelTemplate._session

        try:
            # if can't open the session, kill existing one
            # then try reopen (up to 5 times...)
            i = 0

            while i < 5:
                if session is not None:
                    if not session.openService("//blp/refdata"):
                        logger.info("Try reopening Bloomberg session... try " +
                                    str(i))
                        self.kill_session(
                            session
                        )  # need to forcibly kill_session since can't always reopen
                        session = self.start_bloomberg_session()

                        if session is not None:
                            if session.openService("//blp/refdata"): i = 6
                else:
                    logger.info("Try opening Bloomberg session... try " +
                                str(i))
                    session = self.start_bloomberg_session()

                i = i + 1

            # give error if still doesn't work after several tries..
            if not session.openService("//blp/refdata"):
                logger.error("Failed to open //blp/refdata")

                return

            logger.info("Creating request...")

            eventQueue = blpapi.EventQueue()
            # eventQueue = None

            # create a request
            from blpapi import CorrelationId
            cid = CorrelationId()
            options = self.fill_options(market_data_request)

            if options.security is not None:
                self.send_bar_request(session, eventQueue, options, cid)

                logger.info("Waiting for data to be returned...")

                data_frame = self.event_loop(session)
            else:
                logger.warn("No ticker or field specified!")

                data_frame = None
        finally:
            # stop the session (will fail if NoneType)
            try:
                session.stop()
            except:
                pass

        return data_frame
コード例 #4
0
class BBGLowLevelDaily(BBGLowLevelTemplate):
    def __init__(self):
        super(BBGLowLevelDaily, self).__init__()

        self.logger = LoggerManager().getLogger(__name__)

    def combine_slices(self, data_frame_cols, data_frame_slice):
        # data
        try:
            if (data_frame_slice.columns.get_level_values(1).values[0]
                    not in data_frame_cols):
                # return data_frame.join(data_frame_slice, how="outer")
                return data_frame_slice
        except Exception as e:
            self.logger.warn('Data slice empty ' + str(e))

            return None

        return None

    # populate options for Bloomberg request for asset daily request
    def fill_options(self, market_data_request):
        options = OptionsBBG()

        options.security = market_data_request.tickers
        options.startDateTime = market_data_request.start_date
        options.endDateTime = market_data_request.finish_date
        options.fields = market_data_request.fields

        options.overrides = market_data_request.overrides

        return options

    def process_message(self, msg):
        # Process received events

        # SLOW loop (careful, not all the fields will be returned every time hence need to include the field name in the tuple)
        # perhaps try to run in parallel?

        implementation = 'simple'

        if implementation == 'simple':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            data = defaultdict(dict)
            #
            # # FASTER avoid calling getValue/getElement methods in blpapi, very slow, better to cache variables
            for i in range(fieldData.numValues()):
                mini_field_data = fieldData.getValue(i)
                date = mini_field_data.getElement(0).getValue()

                for j in range(1, mini_field_data.numElements()):
                    field_value = mini_field_data.getElement(j)

                    data[(str(field_value.name()),
                          ticker)][date] = field_value.getValue()

            # ORIGINAL repeated calling getValue/getElement much slower
            # for i in range(fieldData.numValues()):
            #     for j in range(1, fieldData.getValue(i).numElements()):
            #         data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \
            #             = fieldData.getValue(i).getElement(j).getValue()
        elif implementation == 'py4j':
            pass

            # TODO Py4J
            # from findatapy.market.bbgloop import bbgloop
            # from py4j.java_gateway import JavaGateway

            # gateway = JavaGateway()
            # data = gateway.entry_point.parseFieldDataArray(msg)
        elif implementation == 'cython':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            from findatapy.market.bbgloop import bbgloop

            data = bbgloop(fieldData, ticker)
        elif implementation == 'numba':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            from findatapy.market.bbgloop_numba import bbgloop_numba

            data = bbgloop_numba(fieldData, ticker)
            # TODO cython

        data_frame = pandas.DataFrame(data)

        # if obsolete ticker could return no values
        if (not (data_frame.empty)):
            # data_frame.columns = pandas.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            data_frame.index = pandas.to_datetime(data_frame.index)
            self.logger.info("Read: " + ticker + ' ' +
                             str(data_frame.index[0]) + ' - ' +
                             str(data_frame.index[-1]))
        else:
            return None

        return data_frame

    # create request for data
    def send_bar_request(self, session, eventQueue, options, cid):
        refDataService = session.getService("//blp/refdata")
        request = refDataService.createRequest("HistoricalDataRequest")

        request.set("startDate", options.startDateTime.strftime('%Y%m%d'))
        request.set("endDate", options.endDateTime.strftime('%Y%m%d'))

        # # only one security/eventType per request
        for field in options.fields:
            request.getElement("fields").appendValue(field)

        for security in options.security:
            request.getElement("securities").appendValue(security)

        self.logger.info("Sending Bloomberg Daily Request:" + str(request))
        session.sendRequest(request=request, correlationId=cid)