def _pandas_to_bucket(df, symbol, initial_image): rtn = {SYMBOL: symbol, VERSION: CHUNK_VERSION_NUMBER, COLUMNS: {}, COUNT: len(df)} end = to_dt(df.index[-1].to_pydatetime()) if initial_image : if 'index' in initial_image: start = min(to_dt(df.index[0].to_pydatetime()), initial_image['index']) else: start = to_dt(df.index[0].to_pydatetime()) image_start = initial_image.get('index', start) image = {k: v for k, v in initial_image.items() if k != 'index'} rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image} final_image = TickStore._pandas_compute_final_image(df, initial_image, end) else: start = to_dt(df.index[0].to_pydatetime()) final_image = {} rtn[END] = end rtn[START] = start logger.warning("NB treating all values as 'exists' - no longer sparse") rowmask = Binary(lz4_compressHC(np.packbits(np.ones(len(df), dtype='uint8')).tostring())) index_name = df.index.names[0] or "index" recs = df.to_records(convert_datetime64=False) for col in df: array = TickStore._ensure_supported_dtypes(recs[col]) col_data = {} col_data[DATA] = Binary(lz4_compressHC(array.tostring())) col_data[ROWMASK] = rowmask col_data[DTYPE] = TickStore._str_dtype(array.dtype) rtn[COLUMNS][col] = col_data rtn[INDEX] = Binary(lz4_compressHC(np.concatenate(([recs[index_name][0].astype('datetime64[ms]').view('uint64')], np.diff(recs[index_name].astype('datetime64[ms]').view('uint64')))).tostring())) return rtn, final_image
def _to_bucket(ticks, symbol, initial_image): rtn = { SYMBOL: symbol, VERSION: CHUNK_VERSION_NUMBER, COLUMNS: {}, COUNT: len(ticks) } data = {} rowmask = {} start = to_dt(ticks[0]['index']) end = to_dt(ticks[-1]['index']) final_image = copy.copy(initial_image) if initial_image else {} for i, t in enumerate(ticks): if initial_image: final_image.update(t) for k, v in iteritems(t): try: if k != 'index': rowmask[k][i] = 1 else: v = TickStore._to_ms(v) if data[k][-1] > v: raise UnorderedDataException( "Timestamps out-of-order: %s > %s" % (ms_to_datetime(data[k][-1]), t)) data[k].append(v) except KeyError: if k != 'index': rowmask[k] = np.zeros(len(ticks), dtype='uint8') rowmask[k][i] = 1 data[k] = [v] rowmask = dict([(k, Binary(lz4_compressHC(np.packbits(v).tostring()))) for k, v in iteritems(rowmask)]) for k, v in iteritems(data): if k != 'index': v = np.array(v) v = TickStore._ensure_supported_dtypes(v) rtn[COLUMNS][k] = { DATA: Binary(lz4_compressHC(v.tostring())), DTYPE: TickStore._str_dtype(v.dtype), ROWMASK: rowmask[k] } if initial_image: image_start = initial_image.get('index', start) if image_start > start: raise UnorderedDataException( "Image timestamp is after first tick: %s > %s" % (image_start, start)) start = min(start, image_start) rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image} rtn[END] = end rtn[START] = start rtn[INDEX] = Binary( lz4_compressHC( np.concatenate( ([data['index'][0]], np.diff(data['index']))).tostring())) return rtn, final_image
def _to_bucket(ticks, symbol, initial_image): rtn = {SYMBOL: symbol, VERSION: CHUNK_VERSION_NUMBER, COLUMNS: {}, COUNT: len(ticks)} data = {} rowmask = {} start = to_dt(ticks[0]['index']) end = to_dt(ticks[-1]['index']) final_image = copy.copy(initial_image) if initial_image else {} for i, t in enumerate(ticks): if initial_image: final_image.update(t) for k, v in iteritems(t): try: if k != 'index': rowmask[k][i] = 1 else: v = TickStore._to_ms(v) if data[k][-1] > v: raise UnorderedDataException("Timestamps out-of-order: %s > %s" % ( ms_to_datetime(data[k][-1]), t)) data[k].append(v) except KeyError: if k != 'index': rowmask[k] = np.zeros(len(ticks), dtype='uint8') rowmask[k][i] = 1 data[k] = [v] rowmask = dict([(k, Binary(lz4_compressHC(np.packbits(v).tostring()))) for k, v in iteritems(rowmask)]) for k, v in iteritems(data): if k != 'index': v = np.array(v) v = TickStore._ensure_supported_dtypes(v) rtn[COLUMNS][k] = {DATA: Binary(lz4_compressHC(v.tostring())), DTYPE: TickStore._str_dtype(v.dtype), ROWMASK: rowmask[k]} if initial_image: image_start = initial_image.get('index', start) if image_start > start: raise UnorderedDataException("Image timestamp is after first tick: %s > %s" % ( image_start, start)) start = min(start, image_start) rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image} rtn[END] = end rtn[START] = start rtn[INDEX] = Binary(lz4_compressHC(np.concatenate(([data['index'][0]], np.diff(data['index']))).tostring())) return rtn, final_image
def _pandas_to_bucket(df, symbol, initial_image): rtn = {SYMBOL: symbol, VERSION: CHUNK_VERSION_NUMBER, COLUMNS: {}, COUNT: len(df)} end = to_dt(df.index[-1].to_pydatetime()) if initial_image: if 'index' in initial_image: start = min(to_dt(df.index[0].to_pydatetime()), initial_image['index']) else: start = to_dt(df.index[0].to_pydatetime()) image_start = initial_image.get('index', start) rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image} final_image = TickStore._pandas_compute_final_image(df, initial_image, end) else: start = to_dt(df.index[0].to_pydatetime()) final_image = {} rtn[END] = end rtn[START] = start logger.warning("NB treating all values as 'exists' - no longer sparse") rowmask = Binary(lz4_compressHC(np.packbits(np.ones(len(df), dtype='uint8')).tostring())) index_name = df.index.names[0] or "index" recs = df.to_records(convert_datetime64=False) for col in df: array = TickStore._ensure_supported_dtypes(recs[col]) col_data = { DATA: Binary(lz4_compressHC(array.tostring())), ROWMASK: rowmask, DTYPE: TickStore._str_dtype(array.dtype), } rtn[COLUMNS][col] = col_data rtn[INDEX] = Binary( lz4_compressHC(np.concatenate( ([recs[index_name][0].astype('datetime64[ms]').view('uint64')], np.diff( recs[index_name].astype('datetime64[ms]').view('uint64')))).tostring())) return rtn, final_image
def compressHC(_str): """ HC compression """ return lz4_compressHC(_str)
def compressHC(_str): """ HC compression """ return lz4_compressHC(_str)