Exemple #1
0
def test_none_df():
    with pytest.raises(ArcticSerializationException):
        incr_ser = IncrementalPandasToRecArraySerializer(
            df_serializer, None, chunk_size=_CHUNK_SIZE)
        incr_ser.serialize()
    with pytest.raises(ArcticSerializationException):
        incr_ser = IncrementalPandasToRecArraySerializer(
            df_serializer, None, chunk_size=_CHUNK_SIZE)
        incr_ser.generator_bytes()
Exemple #2
0
def test_generator_bytes_range(from_idx, to_idx):
    # Tests also negative indexing
    df = _mixed_test_data()['large'][0]
    expectation = _mixed_test_data()['large'][1]

    incr_ser = IncrementalPandasToRecArraySerializer(df_serializer,
                                                     df,
                                                     chunk_size=_CHUNK_SIZE)

    chunk_bytes = [
        chunk_b
        for chunk_b, _, _, _ in incr_ser.generator_bytes(from_idx=from_idx,
                                                         to_idx=to_idx)
    ]
    matching = expectation[0][from_idx:to_idx].tostring() == b''.join(
        chunk_bytes)
    assert matching
    assert expectation[1] == incr_ser.dtype
Exemple #3
0
def test_serialize_pandas_to_recarray(input_df_descr):
    if not is_test_data_serializable(input_df_descr):
        return

    df = _mixed_test_data()[input_df_descr][0]
    expectation = _mixed_test_data()[input_df_descr][1]

    incr_ser = IncrementalPandasToRecArraySerializer(df_serializer,
                                                     df,
                                                     chunk_size=_CHUNK_SIZE)
    if not isinstance(expectation, tuple) and issubclass(
            expectation, Exception):
        with pytest.raises(expectation):
            [chunk for chunk, _, _, _ in incr_ser.generator_bytes()]
    else:
        incr_ser_data, incr_ser_dtype = incr_ser.serialize()
        matching = expectation[0].tostring() == incr_ser_data.tostring()
        assert matching
        assert expectation[1] == incr_ser_dtype
Exemple #4
0
def test_serialize_incremental_chunk_size_pandas_to_recarray(input_df_descr):
    if not is_test_data_serializable(input_df_descr):
        return

    df = _mixed_test_data()[input_df_descr][0]
    expectation = _mixed_test_data()[input_df_descr][1]

    if not isinstance(expectation, tuple) and issubclass(
            expectation, Exception):
        for div in (1, 4, 8):
            chunk_size = div * 8 * 1024**2
            with pytest.raises(expectation):
                incr_ser = IncrementalPandasToRecArraySerializer(
                    df_serializer, df, chunk_size=chunk_size)
                [chunk for chunk, _, _, _ in incr_ser.generator_bytes()]
        return

    for div in (1, 4, 8):
        chunk_size = div * 8 * 1024**2
        if input_df_descr is not None and len(expectation) > 0:
            row_size = int(expectation[0].dtype.itemsize)
            chunk_size = NON_HOMOGENEOUS_DTYPE_PATCH_SIZE_ROWS * row_size / div
        incr_ser = IncrementalPandasToRecArraySerializer(df_serializer,
                                                         df,
                                                         chunk_size=chunk_size)
        chunk_bytes = [chunk for chunk, _, _, _ in incr_ser.generator_bytes()]
        matching = expectation[0].tostring() == b''.join(chunk_bytes)
        assert matching
        assert expectation[1] == incr_ser.dtype
Exemple #5
0
def test_incremental_bad_init():
    with pytest.raises(ArcticSerializationException):
        IncrementalPandasToRecArraySerializer(df_serializer,
                                              'hello world',
                                              chunk_size=_CHUNK_SIZE)
    with pytest.raises(ArcticSerializationException):
        IncrementalPandasToRecArraySerializer(df_serializer,
                                              1234,
                                              chunk_size=_CHUNK_SIZE)
    with pytest.raises(ArcticSerializationException):
        IncrementalPandasToRecArraySerializer(df_serializer,
                                              _mixed_test_data()['small'][0],
                                              chunk_size=0)
    with pytest.raises(ArcticSerializationException):
        IncrementalPandasToRecArraySerializer(df_serializer,
                                              _mixed_test_data()['small'][0],
                                              chunk_size=-1)
    with pytest.raises(ArcticSerializationException):
        IncrementalPandasToRecArraySerializer(df_serializer,
                                              _mixed_test_data()['small'][0],
                                              chunk_size=_CHUNK_SIZE,
                                              string_max_len=-1)
Exemple #6
0
def test_shape(input_df_descr):
    if not is_test_data_serializable(input_df_descr):
        return

    df = _mixed_test_data()[input_df_descr][0]
    expectation = _mixed_test_data()[input_df_descr][1]

    incr_ser = IncrementalPandasToRecArraySerializer(df_serializer,
                                                     df,
                                                     chunk_size=_CHUNK_SIZE)

    if not isinstance(expectation, tuple) and issubclass(
            expectation, Exception):
        with pytest.raises(expectation):
            [chunk for chunk, _, _, _ in incr_ser.shape]
    else:
        assert incr_ser.shape == expectation[0].shape