def test_can_convert_to_records_mixed_object_column_string_nan(fast_serializable_check): with FastCheckSerializable(fast_serializable_check): serializer = anr.DataFrameSerializer() df = pd.DataFrame({'a': [1, 3, 4], 'b': [1.2, 8.0, 0.2]}) assert serializer.can_convert_to_records_without_objects(df, 'my_symbol') df = pd.DataFrame({'a': [1, 3, 4], 'b': [1, 8.0, 2]}) assert serializer.can_convert_to_records_without_objects(df, 'my_symbol') df = pd.DataFrame({'a': [1, 3, 4], 'b': [1.2, 8.0, np.NaN]}) assert serializer.can_convert_to_records_without_objects(df, 'my_symbol') df = pd.DataFrame({'a': ['abc', 'cde', 'def'], 'b': [1.2, 8.0, np.NaN]}) assert serializer.can_convert_to_records_without_objects(df, 'my_symbol') df = pd.DataFrame({'a': [u'abc', u'cde', 'def'], 'b': [1.2, 8.0, np.NaN]}) assert serializer.can_convert_to_records_without_objects(df, 'my_symbol') df = pd.DataFrame({'a': [u'abc', u'cde', 'def'], 'b': [1.2, '8.0', np.NaN]}) assert not serializer.can_convert_to_records_without_objects(df, 'my_symbol') # Do not serialize and force-stringify None df = pd.DataFrame({'a': ['abc', None, 'def'], 'b': [1.2, 8.0, np.NaN]}) assert not serializer.can_convert_to_records_without_objects(df, 'my_symbol') # Do not serialize and force-stringify np.NaN among strings, rather pickle df = pd.DataFrame({'a': ['abc', np.NaN, 'def'], 'b': [1.2, 8.0, np.NaN]}) assert not serializer.can_convert_to_records_without_objects(df, 'my_symbol')
import time import arctic.serialization.numpy_records as anr from tests.unit.serialization.serialization_test_data import _mixed_test_data as input_test_data df_serializer = anr.DataFrameSerializer() def _bench(rounds, input_df, fast): fast = bool(fast) anr.set_fast_check_df_serializable(fast) start = time.time() for i in range(rounds): df_serializer.can_convert_to_records_without_objects(input_df, 'symA') print("Time per iteration (fast={}): {}".format(fast, (time.time() - start)/rounds)) # Results suggest significant speed improvements for # (1) large df with objects # Time per iteration (fast=False): 0.0281402397156 # Time per iteration (fast=True): 0.00866063833237 # (2) large multi-column df # Time per iteration (fast=False): 0.00556221961975 # Time per iteration (fast=True): 0.00276621818542 # (3) large multi-index df # Time per iteration (fast=False): 0.00640722036362 # Time per iteration (fast=True): 0.00154552936554 def assess_speed(df_kind): rounds = 100 input_df = input_test_data()[df_kind][0] orig_config = anr._FAST_CHECK_DF_SERIALIZABLE