Ejemplo n.º 1
0
def get_coldata(coldata):
    """ return values and rowmask """
    dtype = np.dtype(coldata[DTYPE])
    values = np.frombuffer(decompress(coldata[DATA]), dtype=dtype)
    rowmask = np.unpackbits(
        np.frombuffer(decompress(coldata[ROWMASK]), dtype='uint8'))
    return list(values), list(rowmask)
Ejemplo n.º 2
0
    def _read_bucket(self, doc, column_set, column_dtypes, include_symbol,
                     include_images, columns):
        rtn = {}
        if doc[VERSION] != 3:
            raise ArcticException("Unhandled document version: %s" %
                                  doc[VERSION])
        # np.cumsum copies the read-only array created with frombuffer
        rtn[INDEX] = np.cumsum(
            np.frombuffer(decompress(doc[INDEX]), dtype='uint64'))
        doc_length = len(rtn[INDEX])
        column_set.update(doc[COLUMNS].keys())

        # get the mask for the columns we're about to load
        union_mask = np.zeros((doc_length + 7) // 8, dtype='uint8')
        for c in column_set:
            try:
                coldata = doc[COLUMNS][c]
                # the or below will make a copy of this read-only array
                mask = np.frombuffer(decompress(coldata[ROWMASK]),
                                     dtype='uint8')
                union_mask = union_mask | mask
            except KeyError:
                rtn[c] = None
        union_mask = np.unpackbits(union_mask)[:doc_length].astype('bool')
        rtn_length = np.sum(union_mask)

        rtn[INDEX] = rtn[INDEX][union_mask]
        if include_symbol:
            rtn['SYMBOL'] = [
                doc[SYMBOL],
            ] * rtn_length

        # Unpack each requested column in turn
        for c in column_set:
            try:
                coldata = doc[COLUMNS][c]
                dtype = np.dtype(coldata[DTYPE])
                # values ends up being copied by pandas before being returned to the user. However, we
                # copy it into a bytearray here for safety.
                values = np.frombuffer(bytearray(decompress(coldata[DATA])),
                                       dtype=dtype)
                self._set_or_promote_dtype(column_dtypes, c, dtype)
                rtn[c] = self._empty(rtn_length, dtype=column_dtypes[c])
                # unpackbits will make a copy of the read-only array created by frombuffer
                rowmask = np.unpackbits(
                    np.frombuffer(decompress(coldata[ROWMASK]),
                                  dtype='uint8'))[:doc_length].astype('bool')
                rowmask = rowmask[union_mask]
                rtn[c][rowmask] = values
            except KeyError:
                rtn[c] = None

        if include_images and doc.get(IMAGE_DOC, {}).get(IMAGE, {}):
            rtn = self._prepend_image(rtn, doc[IMAGE_DOC], rtn_length,
                                      column_dtypes, column_set, columns)
        return rtn
Ejemplo n.º 3
0
def test_exceptions():
    data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
    data = data[0:16]
    with pytest.raises(Exception) as e:
        c.decompress(data)
    assert("decompressor wrote" in str(e.value).lower() or "corrupt input at" in str(e.value).lower() or "decompression failed: corrupt input" in str(e.value).lower())

    data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
    data = [data[0:16] for x in (1, 2, 3)]
    with pytest.raises(Exception) as e:
        c.decompress_array(data)
    assert ("decompressor wrote" in str(e.value).lower() or "corrupt input at" in str(e.value).lower() or "decompression failed: corrupt input" in str(e.value).lower())
Ejemplo n.º 4
0
def test_exceptions():
    data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
    data = data[0:16]
    with pytest.raises(Exception) as e:
        c.decompress(data)
    assert("decompressor wrote" in str(e).lower() or "corrupt input at" in str(e).lower() or "decompression failed: corrupt input" in str(e).lower())

    data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
    data = [data[0:16] for x in (1, 2, 3)]
    with pytest.raises(Exception) as e:
        c.decompress_array(data)
    assert ("decompressor wrote" in str(e).lower() or "corrupt input at" in str(e).lower() or "decompression failed: corrupt input" in str(e).lower())
Ejemplo n.º 5
0
    def _read_bucket(self, doc, column_set, column_dtypes, include_symbol,
                     include_images, columns):
        rtn = {}
        if doc[VERSION] != 3:
            raise ArcticException("Unhandled document version: %s" %
                                  doc[VERSION])
        rtn[INDEX] = np.cumsum(
            np.frombuffer(decompress(doc[INDEX]), dtype='uint64'))
        doc_length = len(rtn[INDEX])
        column_set.update(doc[COLUMNS].keys())

        # get the mask for the columns we're about to load
        union_mask = np.zeros((doc_length + 7) // 8, dtype='uint8')
        for c in column_set:
            try:
                coldata = doc[COLUMNS][c]
                mask = np.frombuffer(decompress(coldata[ROWMASK]),
                                     dtype='uint8')
                union_mask = union_mask | mask
            except KeyError:
                rtn[c] = None
        union_mask = np.unpackbits(union_mask)[:doc_length].astype('bool')
        rtn_length = np.sum(union_mask)

        rtn[INDEX] = rtn[INDEX][union_mask]
        if include_symbol:
            rtn['SYMBOL'] = [
                doc[SYMBOL],
            ] * rtn_length

        # Unpack each requested column in turn
        for c in column_set:
            try:
                coldata = doc[COLUMNS][c]
                dtype = np.dtype(coldata[DTYPE])
                values = np.frombuffer(decompress(coldata[DATA]), dtype=dtype)
                self._set_or_promote_dtype(column_dtypes, c, dtype)
                rtn[c] = self._empty(rtn_length, dtype=column_dtypes[c])
                rowmask = np.unpackbits(
                    np.frombuffer(decompress(coldata[ROWMASK]),
                                  dtype='uint8'))[:doc_length].astype('bool')
                rowmask = rowmask[union_mask]
                rtn[c][rowmask] = values
            except KeyError:
                rtn[c] = None

        if include_images and doc.get(IMAGE_DOC, {}).get(IMAGE, {}):
            rtn = self._prepend_image(rtn, doc[IMAGE_DOC], rtn_length,
                                      column_dtypes, column_set, columns)
        return rtn
Ejemplo n.º 6
0
def test_tickstore_pandas_to_bucket_image():
    symbol = 'SYM'
    tz = 'UTC'
    initial_image = {'index': dt(2014, 1, 1, 0, 0, tzinfo=mktz(tz)), 'A': 123, 'B': 54.4, 'C': 'DESC'}
    data = [{'A': 120, 'D': 1}, {'A': 122, 'B': 2.0}, {'A': 3, 'B': 3.0, 'D': 1}]
    tick_index = [dt(2014, 1, 2, 0, 0, tzinfo=mktz(tz)),
                  dt(2014, 1, 3, 0, 0, tzinfo=mktz(tz)),
                  dt(2014, 1, 4, 0, 0, tzinfo=mktz(tz))]
    data = pd.DataFrame(data, index=tick_index)
    bucket, final_image = TickStore._pandas_to_bucket(data, symbol, initial_image)
    assert final_image == {'index': dt(2014, 1, 4, 0, 0, tzinfo=mktz(tz)), 'A': 3, 'B': 3.0, 'C': 'DESC', 'D': 1}
    assert IMAGE_DOC in bucket
    assert bucket[COUNT] == 3
    assert bucket[START] == dt(2014, 1, 1, 0, 0, tzinfo=mktz(tz))
    assert bucket[END] == dt(2014, 1, 4, 0, 0, tzinfo=mktz(tz))
    assert set(bucket[COLUMNS]) == set(('A', 'B', 'D'))
    assert set(bucket[COLUMNS]['A']) == set((ROWMASK, DTYPE, DATA))
    assert get_coldata(bucket[COLUMNS]['A']) == ([120, 122, 3], [1, 1, 1, 0, 0, 0, 0, 0])
    values, rowmask = get_coldata(bucket[COLUMNS]['B'])
    assert np.isnan(values[0]) and values[1:] == [2.0, 3.0]
    assert rowmask == [1, 1, 1, 0, 0, 0, 0, 0]
    values, rowmask = get_coldata(bucket[COLUMNS]['D'])
    assert np.isnan(values[1])
    assert values[0] == 1 and values[2] == 1
    assert rowmask == [1, 1, 1, 0, 0, 0, 0, 0]
    index = [dt.fromtimestamp(int(i/1000)).replace(tzinfo=mktz(tz)) for i in
             list(np.cumsum(np.frombuffer(decompress(bucket[INDEX]), dtype='uint64')))]
    assert index == tick_index
    assert bucket[COLUMNS]['A'][DTYPE] == 'int64'
    assert bucket[COLUMNS]['B'][DTYPE] == 'float64'
    assert bucket[SYMBOL] == symbol
    assert bucket[IMAGE_DOC] == {IMAGE: initial_image,
                                 IMAGE_TIME: initial_image['index']}
Ejemplo n.º 7
0
def test_tickstore_to_bucket_with_image():
    symbol = 'SYM'
    tz = 'UTC'
    initial_image = {'index': dt(2014, 1, 1, 0, 0, tzinfo=mktz(tz)), 'A': 123, 'B': 54.4, 'C': 'DESC'}
    data = [{'index': dt(2014, 1, 1, 0, 1, tzinfo=mktz(tz)), 'A': 124, 'D': 0},
            {'index': dt(2014, 1, 1, 0, 2, tzinfo=mktz(tz)), 'A': 125, 'B': 27.2}]
    bucket, final_image = TickStore._to_bucket(data, symbol, initial_image)
    assert bucket[COUNT] == 2
    assert bucket[END] == dt(2014, 1, 1, 0, 2, tzinfo=mktz(tz))
    assert set(bucket[COLUMNS]) == set(('A', 'B', 'D'))
    assert set(bucket[COLUMNS]['A']) == set((ROWMASK, DTYPE, DATA))
    assert get_coldata(bucket[COLUMNS]['A']) == ([124, 125], [1, 1, 0, 0, 0, 0, 0, 0])
    assert get_coldata(bucket[COLUMNS]['B']) == ([27.2], [0, 1, 0, 0, 0, 0, 0, 0])
    assert get_coldata(bucket[COLUMNS]['D']) == ([0], [1, 0, 0, 0, 0, 0, 0, 0])
    index = [dt.fromtimestamp(int(i/1000)).replace(tzinfo=mktz(tz)) for i in
             list(np.cumsum(np.frombuffer(decompress(bucket[INDEX]), dtype='uint64')))]
    assert index == [i['index'] for i in data]
    assert bucket[COLUMNS]['A'][DTYPE] == 'int64'
    assert bucket[COLUMNS]['B'][DTYPE] == 'float64'
    assert bucket[SYMBOL] == symbol
    assert bucket[START] == initial_image['index']
    assert bucket[IMAGE_DOC][IMAGE] == initial_image
    assert bucket[IMAGE_DOC] == {IMAGE: initial_image,
                                 IMAGE_TIME: initial_image['index']}
    assert final_image == {'index': data[-1]['index'], 'A': 125, 'B': 27.2, 'C': 'DESC', 'D': 0}
Ejemplo n.º 8
0
    def _do_read(self,
                 backing_store,
                 library_name,
                 version,
                 symbol,
                 index_range=None):
        '''
        index_range is a 2-tuple of integers - a [from, to) range of segments to be read.
            Either from or to can be None, indicating no bound.
        '''
        from_index = index_range[0] if index_range else None
        to_index = version['up_to']
        if index_range and index_range[1] and index_range[1] < version['up_to']:
            to_index = index_range[1]

        segment_keys = version['segment_keys']
        filtered_segment_keys = []
        for i, segment_index in enumerate(version['raw_segment_index']):
            if (from_index is None or segment_index >= from_index) and \
                    (to_index is None or segment_index <= to_index):
                filtered_segment_keys.append(segment_keys[i])

        data = bytearray()
        for segment in backing_store.read_segments(library_name,
                                                   filtered_segment_keys):
            data.extend(decompress(segment))

        dtype = self._dtype(version['dtype'],
                            version.get('dtype_metadata', {}))
        rtn = np.frombuffer(data,
                            dtype=dtype).reshape(version.get('shape', (-1)))
        return rtn
Ejemplo n.º 9
0
def test_exceptions():
    data = c.compress(
        b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111'
    )
    data = data[0:16]
    with pytest.raises(Exception) as e:
        c.decompress(data)
    assert ("Decompressor wrote" in str(e) or "Corrupt input at" in str(e))

    data = c.compress(
        b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111'
    )
    data = [data[0:16] for x in (1, 2, 3)]
    with pytest.raises(Exception) as e:
        c.decompress_array(data)
    assert ("Decompressor wrote" in str(e) or "Corrupt input at" in str(e))
Ejemplo n.º 10
0
def test_tickstore_to_bucket_with_image():
    symbol = 'SYM'
    tz = 'UTC'
    initial_image = {'index': dt(2014, 1, 1, 0, 0, tzinfo=mktz(tz)), 'A': 123, 'B': 54.4, 'C': 'DESC'}
    data = [{'index': dt(2014, 1, 1, 0, 1, tzinfo=mktz(tz)), 'A': 124, 'D': 0},
            {'index': dt(2014, 1, 1, 0, 2, tzinfo=mktz(tz)), 'A': 125, 'B': 27.2}]
    bucket, final_image = TickStore._to_bucket(data, symbol, initial_image)
    assert bucket[COUNT] == 2
    assert bucket[END] == dt(2014, 1, 1, 0, 2, tzinfo=mktz(tz))
    assert set(bucket[COLUMNS]) == set(('A', 'B', 'D'))
    assert set(bucket[COLUMNS]['A']) == set((ROWMASK, DTYPE, DATA))
    assert get_coldata(bucket[COLUMNS]['A']) == ([124, 125], [1, 1, 0, 0, 0, 0, 0, 0])
    assert get_coldata(bucket[COLUMNS]['B']) == ([27.2], [0, 1, 0, 0, 0, 0, 0, 0])
    assert get_coldata(bucket[COLUMNS]['D']) == ([0], [1, 0, 0, 0, 0, 0, 0, 0])
    index = [dt.fromtimestamp(int(i/1000)).replace(tzinfo=mktz(tz)) for i in
             list(np.cumsum(np.frombuffer(decompress(bucket[INDEX]), dtype='uint64')))]
    assert index == [i['index'] for i in data]
    assert bucket[COLUMNS]['A'][DTYPE] == 'int64'
    assert bucket[COLUMNS]['B'][DTYPE] == 'float64'
    assert bucket[SYMBOL] == symbol
    assert bucket[START] == initial_image['index']
    assert bucket[IMAGE_DOC][IMAGE] == initial_image
    assert bucket[IMAGE_DOC] == {IMAGE: initial_image,
                                 IMAGE_TIME: initial_image['index']}
    assert final_image == {'index': data[-1]['index'], 'A': 125, 'B': 27.2, 'C': 'DESC', 'D': 0}
Ejemplo n.º 11
0
def test_tickstore_pandas_to_bucket_image():
    symbol = 'SYM'
    tz = 'UTC'
    initial_image = {'index': dt(2014, 1, 1, 0, 0, tzinfo=mktz(tz)), 'A': 123, 'B': 54.4, 'C': 'DESC'}
    data = [{'A': 120, 'D': 1}, {'A': 122, 'B': 2.0}, {'A': 3, 'B': 3.0, 'D': 1}]
    tick_index = [dt(2014, 1, 2, 0, 0, tzinfo=mktz(tz)),
                  dt(2014, 1, 3, 0, 0, tzinfo=mktz(tz)),
                  dt(2014, 1, 4, 0, 0, tzinfo=mktz(tz))]
    data = pd.DataFrame(data, index=tick_index)
    bucket, final_image = TickStore._pandas_to_bucket(data, symbol, initial_image)
    assert final_image == {'index': dt(2014, 1, 4, 0, 0, tzinfo=mktz(tz)), 'A': 3, 'B': 3.0, 'C': 'DESC', 'D': 1}
    assert IMAGE_DOC in bucket
    assert bucket[COUNT] == 3
    assert bucket[START] == dt(2014, 1, 1, 0, 0, tzinfo=mktz(tz))
    assert bucket[END] == dt(2014, 1, 4, 0, 0, tzinfo=mktz(tz))
    assert set(bucket[COLUMNS]) == set(('A', 'B', 'D'))
    assert set(bucket[COLUMNS]['A']) == set((ROWMASK, DTYPE, DATA))
    assert get_coldata(bucket[COLUMNS]['A']) == ([120, 122, 3], [1, 1, 1, 0, 0, 0, 0, 0])
    values, rowmask = get_coldata(bucket[COLUMNS]['B'])
    assert np.isnan(values[0]) and values[1:] == [2.0, 3.0]
    assert rowmask == [1, 1, 1, 0, 0, 0, 0, 0]
    values, rowmask = get_coldata(bucket[COLUMNS]['D'])
    assert np.isnan(values[1])
    assert values[0] == 1 and values[2] == 1
    assert rowmask == [1, 1, 1, 0, 0, 0, 0, 0]
    index = [dt.fromtimestamp(int(i/1000)).replace(tzinfo=mktz(tz)) for i in
             list(np.cumsum(np.frombuffer(decompress(bucket[INDEX]), dtype='uint64')))]
    assert index == tick_index
    assert bucket[COLUMNS]['A'][DTYPE] == 'int64'
    assert bucket[COLUMNS]['B'][DTYPE] == 'float64'
    assert bucket[SYMBOL] == symbol
    assert bucket[IMAGE_DOC] == {IMAGE: initial_image,
                                 IMAGE_TIME: initial_image['index']}
Ejemplo n.º 12
0
def test_compress_decompress_no_parallel():
    with patch('arctic._compression.clz4', sentinel.clz4), \
         patch('arctic._compression.ENABLE_PARALLEL', False), \
         patch('arctic._compression.lz4', wraps=lz4) as patch_lz4:
        # patching clz4 with sentinel will make accessing any clz4 function explode
        assert decompress(compress(b'Foo')) == b'Foo'
        assert patch_lz4.compress.call_args_list == [call(b'Foo')]
        assert patch_lz4.decompress.call_args_list == [call(compress(b'Foo'))]
Ejemplo n.º 13
0
def test_compress_decompress_no_parallel():
    with patch("arctic._compression.clz4", sentinel.clz4), patch("arctic._compression.ENABLE_PARALLEL", False), patch(
        "arctic._compression.lz4", wraps=lz4
    ) as patch_lz4:
        # patching clz4 with sentinel will make accessing any clz4 function explode
        assert decompress(compress("Foo")) == "Foo"
        assert patch_lz4.compress.call_args_list == [call("Foo")]
        assert patch_lz4.decompress.call_args_list == [call(compress("Foo"))]
Ejemplo n.º 14
0
def test_compress_decompress_no_parallel():
    with patch('arctic._compression.clz4', sentinel.clz4), \
         patch('arctic._compression.ENABLE_PARALLEL', False), \
         patch('arctic._compression.lz4', wraps=lz4) as patch_lz4:
        # patching clz4 with sentinel will make accessing any clz4 function explode
        assert decompress(compress(b'Foo')) == b'Foo'
        assert patch_lz4.compress.call_args_list == [call(b'Foo')]
        assert patch_lz4.decompress.call_args_list == [call(compress(b'Foo'))]
Ejemplo n.º 15
0
    def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_images, columns):
        rtn = {}
        if doc[VERSION] != 3:
            raise ArcticException("Unhandled document version: %s" % doc[VERSION])
        rtn[INDEX] = np.cumsum(np.fromstring(decompress(doc[INDEX]), dtype='uint64'))
        doc_length = len(rtn[INDEX])
        column_set.update(doc[COLUMNS].keys())

        # get the mask for the columns we're about to load
        union_mask = np.zeros((doc_length + 7) // 8, dtype='uint8')
        for c in column_set:
            try:
                coldata = doc[COLUMNS][c]
                mask = np.fromstring(decompress(coldata[ROWMASK]), dtype='uint8')
                union_mask = union_mask | mask
            except KeyError:
                rtn[c] = None
        union_mask = np.unpackbits(union_mask)[:doc_length].astype('bool')
        rtn_length = np.sum(union_mask)

        rtn[INDEX] = rtn[INDEX][union_mask]
        if include_symbol:
            rtn['SYMBOL'] = [doc[SYMBOL], ] * rtn_length

        # Unpack each requested column in turn
        for c in column_set:
            try:
                coldata = doc[COLUMNS][c]
                dtype = np.dtype(coldata[DTYPE])
                values = np.fromstring(decompress(coldata[DATA]), dtype=dtype)
                self._set_or_promote_dtype(column_dtypes, c, dtype)
                rtn[c] = self._empty(rtn_length, dtype=column_dtypes[c])
                rowmask = np.unpackbits(np.fromstring(decompress(coldata[ROWMASK]),
                                        dtype='uint8'))[:doc_length].astype('bool')
                rowmask = rowmask[union_mask]
                rtn[c][rowmask] = values
            except KeyError:
                rtn[c] = None

        if include_images and doc.get(IMAGE_DOC, {}).get(IMAGE, {}):
            rtn = self._prepend_image(rtn, doc[IMAGE_DOC], rtn_length, column_dtypes, column_set, columns)
        return rtn
Ejemplo n.º 16
0
def test_performance_sequential(n, length):
    _str = random_string(length)
    _strarr = [_str for _ in range(n)]
    now = dt.now()
    [c.decompress(y) for y in [c.compressHC(x) for x in _strarr]]
    clz4_time = (dt.now() - now).total_seconds()
    now = dt.now()
    c.decompress_array(c.compressHC_array(_strarr))
    clz4_time_p = (dt.now() - now).total_seconds()
    now = dt.now()
    [lz4_decompress(y) for y in [lz4_compress(x) for x in _strarr]]
    lz4_time = (dt.now() - now).total_seconds()
    print()
    print("LZ4 Test %sx len:%s" % (n, length))
    print("    LZ4 HC %s s" % clz4_time)
    print("    LZ4 HC Parallel %s s" % clz4_time_p)
    print("    LZ4 %s s" % lz4_time)
Ejemplo n.º 17
0
def test_performance_sequential(n, length):
    _str = random_string(length)
    _strarr = [_str for _ in range(n)]
    now = dt.now()
    [c.decompress(y) for y in [c.compressHC(x) for x in _strarr]]
    clz4_time = (dt.now() - now).total_seconds()
    now = dt.now()
    c.decompress_array(c.compressHC_array(_strarr))
    clz4_time_p = (dt.now() - now).total_seconds()
    now = dt.now()
    [lz4_decompress(y) for y in [lz4_compress(x) for x in _strarr]]
    lz4_time = (dt.now() - now).total_seconds()
    print()
    print("LZ4 Test %sx len:%s" % (n, length))
    print("    LZ4 HC %s s" % clz4_time)
    print("    LZ4 HC Parallel %s s" % clz4_time_p)
    print("    LZ4 %s s" % lz4_time)
Ejemplo n.º 18
0
    def _segment_index(self, recarr, existing_index, start, new_segments):
        """
        Generate index of datetime64 -> item offset.

        Parameters:
        -----------
        new_data: new data being written (or appended)
        existing_index: index field from the versions document of the previous version
        start: first (0-based) offset of the new data
        segments: list of offsets. Each offset is the row index of the
                  the last row of a particular chunk relative to the start of the _original_ item.
                  array(new_data) - segments = array(offsets in item)

        Returns:
        --------
        Binary(compress(array([(index, datetime)]))
            Where index is the 0-based index of the datetime in the DataFrame
        """
        # find the index of the first datetime64 column
        idx_col = self._datetime64_index(recarr)
        # if one exists let's create the index on it
        if idx_col is not None:
            new_segments = np.array(new_segments, dtype='i8')
            last_rows = recarr[new_segments - start]
            # create numpy index
            index = np.core.records.fromarrays([last_rows[idx_col]] + [
                new_segments,
            ],
                                               dtype=INDEX_DTYPE)
            # append to existing index if exists
            if existing_index:
                # existing_index_arr is read-only but it's never written to
                existing_index_arr = np.frombuffer(decompress(existing_index),
                                                   dtype=INDEX_DTYPE)
                if start > 0:
                    existing_index_arr = existing_index_arr[
                        existing_index_arr['index'] < start]
                index = np.concatenate((existing_index_arr, index))
            return Binary(compress(index.tostring()))
        elif existing_index:
            raise ArcticException(
                "Could not find datetime64 index in item but existing data contains one"
            )
        return None
Ejemplo n.º 19
0
 def _index_range(self, version, symbol, date_range=None, **kwargs):
     """ Given a version, read the segment_index and return the chunks associated
     with the date_range. As the segment index is (id -> last datetime)
     we need to take care in choosing the correct chunks. """
     if date_range and 'segment_index' in version:
         # index is read-only but it's never written to
         index = np.frombuffer(decompress(version['segment_index']),
                               dtype=INDEX_DTYPE)
         dtcol = self._datetime64_index(index)
         if dtcol and len(index):
             dts = index[dtcol]
             start, end = _start_end(date_range, dts)
             if start > dts[-1]:
                 return -1, -1
             idxstart = min(np.searchsorted(dts, start), len(dts) - 1)
             idxend = min(np.searchsorted(dts, end, side='right'),
                          len(dts) - 1)
             return int(
                 index['index'][idxstart]), int(index['index'][idxend] + 1)
     return super(PandasStore, self)._index_range(version, symbol, **kwargs)
Ejemplo n.º 20
0
def test_roundtrip_multi(n):
    _str = random_string(n)
    cstr = c.compress(_str)
    assert _str == c.decompress(cstr)
Ejemplo n.º 21
0
def test_decompress():
    assert decompress(compress(b"foo")) == b"foo"
Ejemplo n.º 22
0
def test_decompress():
    assert decompress(compress("foo")) == "foo"
Ejemplo n.º 23
0
def test_roundtrip_multi(n):
    _str = random_string(n)
    cstr = c.compress(_str)
    assert _str == c.decompress(cstr)
Ejemplo n.º 24
0
def test_roundtrip(compress):
    _str = b"hello world"
    cstr = compress(_str)
    assert _str == c.decompress(cstr)
Ejemplo n.º 25
0
def test_compress_empty_string():
    assert(decompress(compress(b'')) == b'')
Ejemplo n.º 26
0
def get_coldata(coldata):
    """ return values and rowmask """
    dtype = np.dtype(coldata[DTYPE])
    values = np.frombuffer(decompress(coldata[DATA]), dtype=dtype)
    rowmask = np.unpackbits(np.frombuffer(decompress(coldata[ROWMASK]), dtype='uint8'))
    return list(values), list(rowmask)
Ejemplo n.º 27
0
def test_compress_empty_string():
    assert (decompress(compress(b'')) == b'')
Ejemplo n.º 28
0
def test_roundtrip(compress):
    _str = b"hello world"
    cstr = compress(_str)
    assert _str == c.decompress(cstr)