def test_build_table_columnar_pandas(self): data = pd.DataFrame({ "boolean_": [True, False], "smallint_": np.array([0, 1], dtype=np.int16), "int_": np.array([0, 1], dtype=np.int32), "bigint_": np.array([0, 1], dtype=np.int64), "float_": np.array([0, 1], dtype=np.float32), "double_": np.array([0, 1], dtype=np.float64), "varchar_": ["a", "b"], "text_": ['a', 'b'], "time_": [datetime.time(0, 11, 59), datetime.time(13)], "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")], "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)], }, columns=['boolean_', 'smallint_', 'int_', 'bigint_', 'float_', 'double_', 'varchar_', 'text_', 'time_', 'timestamp_', 'date_']) result = _pandas_loaders.build_input_columnar(data, preserve_index=False) nulls = [False, False] expected = [ TColumn(TColumnData(int_col=[True, False]), nulls=nulls), TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int16)), nulls=nulls), # noqa TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int32)), nulls=nulls), # noqa TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int64)), nulls=nulls), # noqa TColumn(TColumnData(real_col=np.array([0, 1], dtype=np.float32)), nulls=nulls), # noqa TColumn(TColumnData(real_col=np.array([0, 1], dtype=np.float64)), nulls=nulls), # noqa TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls), TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls), TColumn(TColumnData(int_col=[719, 46800]), nulls=nulls), TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls), # noqa TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls) ] assert_columnar_equal(result[0], expected)
def test_build_table_columnar(self): from pymapd._pandas_loaders import build_input_columnar data = pd.DataFrame({"a": [1, 2, 3], "b": [1.1, 2.2, 3.3]}) nulls = [False] * 3 result = build_input_columnar(data, preserve_index=False) expected = [ TColumn(TColumnData(int_col=[1, 2, 3]), nulls=nulls), TColumn(TColumnData(real_col=[1.1, 2.2, 3.3]), nulls=nulls) ] assert_columnar_equal(result[0], expected)
def test_build_table_columnar_nulls(self): import pandas as pd import numpy as np data = pd.DataFrame( { "boolean_": [True, False, None], "bigint_": np.array([0, 1, None], dtype=np.object), "double_": np.array([0, 1, None], dtype=np.float64), "varchar_": ["a", "b", None], "text_": ['a', 'b', None], "time_": [datetime.time(0, 11, 59), datetime.time(13), None], "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017"), None], "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1), None], }, columns=[ 'boolean_', 'bigint_', 'double_', 'varchar_', 'text_', 'time_', 'timestamp_', 'date_' ]) result = _pandas_loaders.build_input_columnar(data, preserve_index=False) nulls = [False, False, True] int_na = -2147483648 bigint_na = -9223372036854775808 ns_na = -9223372037 expected = [ TColumn(TColumnData(int_col=[1, 0, int_na]), nulls=nulls), TColumn( TColumnData(int_col=np.array([0, 1, int_na], dtype=np.int64)), nulls=nulls), # noqa TColumn(TColumnData( real_col=np.array([0, 1, np.nan], dtype=np.float64)), nulls=nulls), # noqa TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls), TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls), TColumn(TColumnData(int_col=[719, 46800, bigint_na]), nulls=nulls), TColumn(TColumnData(int_col=[1451606400, 1483228800, ns_na]), nulls=nulls), # noqa TColumn(TColumnData(int_col=[1451606400, 1483228800, bigint_na]), nulls=nulls) # noqa ] assert_columnar_equal(result, expected)
def test_build_table_columnar(self, data, col_properties): from pymapd._pandas_loaders import build_input_columnar col_types = get_col_types(col_properties) result = build_input_columnar( data, preserve_index=False, col_names=data.columns, col_types=col_types, ) expected = get_expected(data, col_properties) assert data.shape[1] == len(expected) assert_columnar_equal(result[0], expected)
def test_build_table_columnar_nulls(self): import pandas as pd import numpy as np data = pd.DataFrame({ "boolean_": [True, False, None], # Currently Pandas does not support storing None or NaN # in integer columns, so int cols with null # need to be objects. This means our type detection will be # unreliable since if there is no number outside the int32 # bounds in a column with nulls then we will be assuming int "int_": np.array([0, 1, None], dtype=np.object), "bigint_": np.array([0, 9223372036854775807, None], dtype=np.object), "double_": np.array([0, 1, None], dtype=np.float64), "varchar_": ["a", "b", None], "text_": ['a', 'b', None], "time_": [datetime.time(0, 11, 59), datetime.time(13), None], "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017"), None], "date_": [datetime.date(1001, 1, 1), datetime.date(2017, 1, 1), None], }, columns=['boolean_', 'int_', 'bigint_', 'double_', 'varchar_', 'text_', 'time_', 'timestamp_', 'date_']) result = _pandas_loaders.build_input_columnar(data, preserve_index=False) nulls = [False, False, True] bool_na = -128 int_na = -2147483648 bigint_na = -9223372036854775808 ns_na = -9223372037 double_na = 0 expected = [ TColumn(TColumnData(int_col=[1, 0, bool_na]), nulls=nulls), TColumn(TColumnData(int_col=np.array([0, 1, int_na], dtype=np.int32)), nulls=nulls), # noqa TColumn(TColumnData(int_col=np.array([0, 9223372036854775807, bigint_na], dtype=np.int64)), nulls=nulls), # noqa TColumn(TColumnData(real_col=np.array([0, 1, double_na], dtype=np.float64)), nulls=nulls), # noqa TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls), TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls), TColumn(TColumnData(int_col=[719, 46800, bigint_na]), nulls=nulls), TColumn(TColumnData(int_col=[1451606400, 1483228800, ns_na]), nulls=nulls), # noqa TColumn(TColumnData(int_col=[-30578688000, 1483228800, bigint_na]), nulls=nulls) # noqa ] assert_columnar_equal(result[0], expected)
def test_build_table_columnar_nulls(self): common_col_params = dict( nullable=True, scale=0, comp_param=0, encoding='NONE', is_array=False, ) col_types = [ ColumnDetails(name='boolean_', type='BOOL', precision=0, **common_col_params), ColumnDetails(name='int_', type='INT', precision=0, **common_col_params), ColumnDetails(name='bigint_', type='BIGINT', precision=0, **common_col_params), ColumnDetails(name='double_', type='DOUBLE', precision=0, **common_col_params), ColumnDetails(name='varchar_', type='STR', precision=0, **common_col_params), ColumnDetails(name='text_', type='STR', precision=0, **common_col_params), ColumnDetails(name='time_', type='TIME', precision=0, **common_col_params), ColumnDetails( name='timestamp_', type='TIMESTAMP', **common_col_params, precision=0, ), ColumnDetails(name='date_', type='DATE', precision=0, **common_col_params), ] data = pd.DataFrame({ 'boolean_': [True, False, None], # Currently Pandas does not support storing None or NaN # in integer columns, so int cols with null # need to be objects. This means our type detection will be # unreliable since if there is no number outside the int32 # bounds in a column with nulls then we will be assuming int 'int_': np.array([0, 1, None], dtype=np.object), 'bigint_': np.array([0, 9223372036854775807, None], dtype=np.object), 'double_': np.array([0, 1, None], dtype=np.float64), 'varchar_': ['a', 'b', None], 'text_': ['a', 'b', None], 'time_': [datetime.time(0, 11, 59), datetime.time(13), None], 'timestamp_': [ pd.Timestamp('2016'), pd.Timestamp('2017'), None, ], 'date_': [ datetime.date(1001, 1, 1), datetime.date(2017, 1, 1), None, ], }) result = _pandas_loaders.build_input_columnar( data, preserve_index=False, col_names=data.columns, col_types=col_types, ) nulls = [False, False, True] bool_na = -128 int_na = -2147483648 bigint_na = -9223372036854775808 ns_na = -9223372037 double_na = 0 expected = [ TColumn(TColumnData(int_col=[1, 0, bool_na]), nulls=nulls), TColumn( TColumnData(int_col=np.array([0, 1, int_na], dtype=np.int32)), nulls=nulls, ), # noqa TColumn( TColumnData(int_col=np.array( [0, 9223372036854775807, bigint_na], dtype=np.int64)), nulls=nulls, ), # noqa TColumn( TColumnData( real_col=np.array([0, 1, double_na], dtype=np.float64)), nulls=nulls, ), # noqa TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls), TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls), TColumn(TColumnData(int_col=[719, 46800, bigint_na]), nulls=nulls), TColumn( TColumnData(int_col=[1451606400, 1483228800, ns_na]), nulls=nulls, ), # noqa TColumn( TColumnData(int_col=[-30578688000, 1483228800, bigint_na]), nulls=nulls, ), # noqa ] assert_columnar_equal(result[0], expected)
def test_build_table_columnar_pandas(self): common_col_params = dict( nullable=True, precision=0, scale=0, comp_param=0, encoding='NONE', is_array=False, ) col_types = [ ColumnDetails(name='boolean_', type='BOOL', **common_col_params), ColumnDetails(name='smallint_', type='SMALLINT', **common_col_params), ColumnDetails(name='int_', type='INT', **common_col_params), ColumnDetails(name='bigint_', type='BIGINT', **common_col_params), ColumnDetails(name='float_', type='FLOAT', **common_col_params), ColumnDetails(name='double_', type='DOUBLE', **common_col_params), ColumnDetails(name='varchar_', type='STR', **common_col_params), ColumnDetails(name='text_', type='STR', **common_col_params), ColumnDetails(name='time_', type='TIME', **common_col_params), ColumnDetails( name='timestamp_', type='TIMESTAMP', nullable=True, precision=0, scale=0, comp_param=0, encoding='NONE', is_array=False, ), ColumnDetails(name='date_', type='DATE', **common_col_params), ] data = pd.DataFrame({ 'boolean_': [True, False], 'smallint_': np.array([0, 1], dtype=np.int16), 'int_': np.array([0, 1], dtype=np.int32), 'bigint_': np.array([0, 1], dtype=np.int64), 'float_': np.array([0, 1], dtype=np.float32), 'double_': np.array([0, 1], dtype=np.float64), 'varchar_': ['a', 'b'], 'text_': ['a', 'b'], 'time_': [datetime.time(0, 11, 59), datetime.time(13)], 'timestamp_': [pd.Timestamp('2016'), pd.Timestamp('2017')], 'date_': [ datetime.date(2016, 1, 1), datetime.date(2017, 1, 1), ], }) result = _pandas_loaders.build_input_columnar( data, preserve_index=False, col_names=data.columns, col_types=col_types, ) nulls = [False, False] expected = [ TColumn(TColumnData(int_col=[True, False]), nulls=nulls), TColumn( TColumnData(int_col=np.array([0, 1], dtype=np.int16)), nulls=nulls, ), # noqa TColumn( TColumnData(int_col=np.array([0, 1], dtype=np.int32)), nulls=nulls, ), # noqa TColumn( TColumnData(int_col=np.array([0, 1], dtype=np.int64)), nulls=nulls, ), # noqa TColumn( TColumnData(real_col=np.array([0, 1], dtype=np.float32)), nulls=nulls, ), # noqa TColumn( TColumnData(real_col=np.array([0, 1], dtype=np.float64)), nulls=nulls, ), # noqa TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls), TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls), TColumn(TColumnData(int_col=[719, 46800]), nulls=nulls), TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls), # noqa TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls), ] assert_columnar_equal(result[0], expected)