Esempio n. 1
0
    def _pandas_to_bucket(df, symbol, initial_image):
        rtn = {SYMBOL: symbol, VERSION: CHUNK_VERSION_NUMBER, COLUMNS: {}, COUNT: len(df)}
        end = to_dt(df.index[-1].to_pydatetime())
        if initial_image :
            if 'index' in initial_image:
                start = min(to_dt(df.index[0].to_pydatetime()), initial_image['index'])
            else:
                start = to_dt(df.index[0].to_pydatetime())
            image_start = initial_image.get('index', start)
            image = {k: v for k, v in initial_image.items() if k != 'index'}
            rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image}
            final_image = TickStore._pandas_compute_final_image(df, initial_image, end)
        else:
            start = to_dt(df.index[0].to_pydatetime())
            final_image = {}
        rtn[END] = end
        rtn[START] = start

        logger.warning("NB treating all values as 'exists' - no longer sparse")
        rowmask = Binary(lz4_compressHC(np.packbits(np.ones(len(df), dtype='uint8')).tostring()))

        index_name = df.index.names[0] or "index"
        recs = df.to_records(convert_datetime64=False)
        for col in df:
            array = TickStore._ensure_supported_dtypes(recs[col])
            col_data = {}
            col_data[DATA] = Binary(lz4_compressHC(array.tostring()))
            col_data[ROWMASK] = rowmask
            col_data[DTYPE] = TickStore._str_dtype(array.dtype)
            rtn[COLUMNS][col] = col_data
        rtn[INDEX] = Binary(lz4_compressHC(np.concatenate(([recs[index_name][0].astype('datetime64[ms]').view('uint64')],
                                                           np.diff(recs[index_name].astype('datetime64[ms]').view('uint64')))).tostring()))
        return rtn, final_image
Esempio n. 2
0
    def _to_bucket(ticks, symbol, initial_image):
        rtn = {
            SYMBOL: symbol,
            VERSION: CHUNK_VERSION_NUMBER,
            COLUMNS: {},
            COUNT: len(ticks)
        }
        data = {}
        rowmask = {}
        start = to_dt(ticks[0]['index'])
        end = to_dt(ticks[-1]['index'])
        final_image = copy.copy(initial_image) if initial_image else {}
        for i, t in enumerate(ticks):
            if initial_image:
                final_image.update(t)
            for k, v in iteritems(t):
                try:
                    if k != 'index':
                        rowmask[k][i] = 1
                    else:
                        v = TickStore._to_ms(v)
                        if data[k][-1] > v:
                            raise UnorderedDataException(
                                "Timestamps out-of-order: %s > %s" %
                                (ms_to_datetime(data[k][-1]), t))
                    data[k].append(v)
                except KeyError:
                    if k != 'index':
                        rowmask[k] = np.zeros(len(ticks), dtype='uint8')
                        rowmask[k][i] = 1
                    data[k] = [v]

        rowmask = dict([(k, Binary(lz4_compressHC(np.packbits(v).tostring())))
                        for k, v in iteritems(rowmask)])
        for k, v in iteritems(data):
            if k != 'index':
                v = np.array(v)
                v = TickStore._ensure_supported_dtypes(v)
                rtn[COLUMNS][k] = {
                    DATA: Binary(lz4_compressHC(v.tostring())),
                    DTYPE: TickStore._str_dtype(v.dtype),
                    ROWMASK: rowmask[k]
                }

        if initial_image:
            image_start = initial_image.get('index', start)
            if image_start > start:
                raise UnorderedDataException(
                    "Image timestamp is after first tick: %s > %s" %
                    (image_start, start))
            start = min(start, image_start)
            rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image}
        rtn[END] = end
        rtn[START] = start
        rtn[INDEX] = Binary(
            lz4_compressHC(
                np.concatenate(
                    ([data['index'][0]], np.diff(data['index']))).tostring()))
        return rtn, final_image
Esempio n. 3
0
    def _to_bucket(ticks, symbol, initial_image):
        rtn = {SYMBOL: symbol, VERSION: CHUNK_VERSION_NUMBER, COLUMNS: {}, COUNT: len(ticks)}
        data = {}
        rowmask = {}
        start = to_dt(ticks[0]['index'])
        end = to_dt(ticks[-1]['index'])
        final_image = copy.copy(initial_image) if initial_image else {}
        for i, t in enumerate(ticks):
            if initial_image:
                final_image.update(t)
            for k, v in iteritems(t):
                try:
                    if k != 'index':
                        rowmask[k][i] = 1
                    else:
                        v = TickStore._to_ms(v)
                        if data[k][-1] > v:
                            raise UnorderedDataException("Timestamps out-of-order: %s > %s" % (
                                ms_to_datetime(data[k][-1]), t))
                    data[k].append(v)
                except KeyError:
                    if k != 'index':
                        rowmask[k] = np.zeros(len(ticks), dtype='uint8')
                        rowmask[k][i] = 1
                    data[k] = [v]

        rowmask = dict([(k, Binary(lz4_compressHC(np.packbits(v).tostring())))
                        for k, v in iteritems(rowmask)])
        for k, v in iteritems(data):
            if k != 'index':
                v = np.array(v)
                v = TickStore._ensure_supported_dtypes(v)
                rtn[COLUMNS][k] = {DATA: Binary(lz4_compressHC(v.tostring())),
                                   DTYPE: TickStore._str_dtype(v.dtype),
                                   ROWMASK: rowmask[k]}

        if initial_image:
            image_start = initial_image.get('index', start)
            if image_start > start:
                raise UnorderedDataException("Image timestamp is after first tick: %s > %s" % (
                    image_start, start))
            start = min(start, image_start)
            rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image}
        rtn[END] = end
        rtn[START] = start
        rtn[INDEX] = Binary(lz4_compressHC(np.concatenate(([data['index'][0]], np.diff(data['index']))).tostring()))
        return rtn, final_image
Esempio n. 4
0
    def _pandas_to_bucket(df, symbol, initial_image):
        rtn = {SYMBOL: symbol, VERSION: CHUNK_VERSION_NUMBER, COLUMNS: {}, COUNT: len(df)}
        end = to_dt(df.index[-1].to_pydatetime())
        if initial_image:
            if 'index' in initial_image:
                start = min(to_dt(df.index[0].to_pydatetime()), initial_image['index'])
            else:
                start = to_dt(df.index[0].to_pydatetime())
            image_start = initial_image.get('index', start)
            rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image}
            final_image = TickStore._pandas_compute_final_image(df, initial_image, end)
        else:
            start = to_dt(df.index[0].to_pydatetime())
            final_image = {}
        rtn[END] = end
        rtn[START] = start

        logger.warning("NB treating all values as 'exists' - no longer sparse")
        rowmask = Binary(lz4_compressHC(np.packbits(np.ones(len(df), dtype='uint8')).tostring()))

        index_name = df.index.names[0] or "index"
        recs = df.to_records(convert_datetime64=False)
        for col in df:
            array = TickStore._ensure_supported_dtypes(recs[col])
            col_data = {
                DATA: Binary(lz4_compressHC(array.tostring())),
                ROWMASK: rowmask,
                DTYPE: TickStore._str_dtype(array.dtype),
            }
            rtn[COLUMNS][col] = col_data
        rtn[INDEX] = Binary(
            lz4_compressHC(np.concatenate(
                ([recs[index_name][0].astype('datetime64[ms]').view('uint64')],
                 np.diff(
                     recs[index_name].astype('datetime64[ms]').view('uint64')))).tostring()))
        return rtn, final_image
Esempio n. 5
0
def compressHC(_str):
    """
    HC compression
    """
    return lz4_compressHC(_str)
Esempio n. 6
0
def compressHC(_str):
    """
    HC compression
    """
    return lz4_compressHC(_str)