Ejemplo n.º 1
0
def test_can_convert_to_records_mixed_object_column_string_nan(fast_serializable_check):
    with FastCheckSerializable(fast_serializable_check):
        serializer = anr.DataFrameSerializer()

        df = pd.DataFrame({'a': [1, 3, 4], 'b': [1.2, 8.0, 0.2]})
        assert serializer.can_convert_to_records_without_objects(df, 'my_symbol')

        df = pd.DataFrame({'a': [1, 3, 4], 'b': [1, 8.0, 2]})
        assert serializer.can_convert_to_records_without_objects(df, 'my_symbol')

        df = pd.DataFrame({'a': [1, 3, 4], 'b': [1.2, 8.0, np.NaN]})
        assert serializer.can_convert_to_records_without_objects(df, 'my_symbol')

        df = pd.DataFrame({'a': ['abc', 'cde', 'def'], 'b': [1.2, 8.0, np.NaN]})
        assert serializer.can_convert_to_records_without_objects(df, 'my_symbol')

        df = pd.DataFrame({'a': [u'abc', u'cde', 'def'], 'b': [1.2, 8.0, np.NaN]})
        assert serializer.can_convert_to_records_without_objects(df, 'my_symbol')

        df = pd.DataFrame({'a': [u'abc', u'cde', 'def'], 'b': [1.2, '8.0', np.NaN]})
        assert not serializer.can_convert_to_records_without_objects(df, 'my_symbol')

        # Do not serialize and force-stringify None
        df = pd.DataFrame({'a': ['abc', None, 'def'], 'b': [1.2, 8.0, np.NaN]})
        assert not serializer.can_convert_to_records_without_objects(df, 'my_symbol')

        # Do not serialize and force-stringify np.NaN among strings, rather pickle
        df = pd.DataFrame({'a': ['abc', np.NaN, 'def'], 'b': [1.2, 8.0, np.NaN]})
        assert not serializer.can_convert_to_records_without_objects(df, 'my_symbol')
Ejemplo n.º 2
0
import time

import arctic.serialization.numpy_records as anr
from tests.unit.serialization.serialization_test_data import _mixed_test_data as input_test_data

df_serializer = anr.DataFrameSerializer()


def _bench(rounds, input_df, fast):
    fast = bool(fast)
    anr.set_fast_check_df_serializable(fast)
    start = time.time()
    for i in range(rounds):
        df_serializer.can_convert_to_records_without_objects(input_df, 'symA')
    print("Time per iteration (fast={}): {}".format(fast, (time.time() - start)/rounds))


# Results suggest significant speed improvements for
#   (1) large df with objects
#       Time per iteration (fast=False): 0.0281402397156
#       Time per iteration (fast=True):  0.00866063833237
#   (2) large multi-column df
#       Time per iteration (fast=False): 0.00556221961975
#       Time per iteration (fast=True):  0.00276621818542
#   (3) large multi-index df
#       Time per iteration (fast=False): 0.00640722036362
#       Time per iteration (fast=True):  0.00154552936554
def assess_speed(df_kind):
    rounds = 100
    input_df = input_test_data()[df_kind][0]
    orig_config = anr._FAST_CHECK_DF_SERIALIZABLE