def test_build_table_columnar(self): from pymapd._pandas_loaders import build_input_columnar data = pd.DataFrame({"a": [1, 2, 3], "b": [1.1, 2.2, 3.3]}) nulls = [False] * 3 result = build_input_columnar(data, preserve_index=False) expected = [ TColumn(TColumnData(int_col=[1, 2, 3]), nulls=nulls), TColumn(TColumnData(real_col=[1.1, 2.2, 3.3]), nulls=nulls) ] assert_columnar_equal(result[0], expected)
def test_build_table_columnar_nulls(self): import pandas as pd import numpy as np data = pd.DataFrame( { "boolean_": [True, False, None], "bigint_": np.array([0, 1, None], dtype=np.object), "double_": np.array([0, 1, None], dtype=np.float64), "varchar_": ["a", "b", None], "text_": ['a', 'b', None], "time_": [datetime.time(0, 11, 59), datetime.time(13), None], "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017"), None], "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1), None], }, columns=[ 'boolean_', 'bigint_', 'double_', 'varchar_', 'text_', 'time_', 'timestamp_', 'date_' ]) result = _pandas_loaders.build_input_columnar(data, preserve_index=False) nulls = [False, False, True] int_na = -2147483648 bigint_na = -9223372036854775808 ns_na = -9223372037 expected = [ TColumn(TColumnData(int_col=[1, 0, int_na]), nulls=nulls), TColumn( TColumnData(int_col=np.array([0, 1, int_na], dtype=np.int64)), nulls=nulls), # noqa TColumn(TColumnData( real_col=np.array([0, 1, np.nan], dtype=np.float64)), nulls=nulls), # noqa TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls), TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls), TColumn(TColumnData(int_col=[719, 46800, bigint_na]), nulls=nulls), TColumn(TColumnData(int_col=[1451606400, 1483228800, ns_na]), nulls=nulls), # noqa TColumn(TColumnData(int_col=[1451606400, 1483228800, bigint_na]), nulls=nulls) # noqa ] assert_columnar_equal(result, expected)
def test_build_table_columnar_nulls(self): import pandas as pd import numpy as np data = pd.DataFrame({ "boolean_": [True, False, None], # Currently Pandas does not support storing None or NaN # in integer columns, so int cols with null # need to be objects. This means our type detection will be # unreliable since if there is no number outside the int32 # bounds in a column with nulls then we will be assuming int "int_": np.array([0, 1, None], dtype=np.object), "bigint_": np.array([0, 9223372036854775807, None], dtype=np.object), "double_": np.array([0, 1, None], dtype=np.float64), "varchar_": ["a", "b", None], "text_": ['a', 'b', None], "time_": [datetime.time(0, 11, 59), datetime.time(13), None], "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017"), None], "date_": [datetime.date(1001, 1, 1), datetime.date(2017, 1, 1), None], }, columns=['boolean_', 'int_', 'bigint_', 'double_', 'varchar_', 'text_', 'time_', 'timestamp_', 'date_']) result = _pandas_loaders.build_input_columnar(data, preserve_index=False) nulls = [False, False, True] bool_na = -128 int_na = -2147483648 bigint_na = -9223372036854775808 ns_na = -9223372037 double_na = 0 expected = [ TColumn(TColumnData(int_col=[1, 0, bool_na]), nulls=nulls), TColumn(TColumnData(int_col=np.array([0, 1, int_na], dtype=np.int32)), nulls=nulls), # noqa TColumn(TColumnData(int_col=np.array([0, 9223372036854775807, bigint_na], dtype=np.int64)), nulls=nulls), # noqa TColumn(TColumnData(real_col=np.array([0, 1, double_na], dtype=np.float64)), nulls=nulls), # noqa TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls), TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls), TColumn(TColumnData(int_col=[719, 46800, bigint_na]), nulls=nulls), TColumn(TColumnData(int_col=[1451606400, 1483228800, ns_na]), nulls=nulls), # noqa TColumn(TColumnData(int_col=[-30578688000, 1483228800, bigint_na]), nulls=nulls) # noqa ] assert_columnar_equal(result[0], expected)
def test_build_table_columnar_pandas(self): import pandas as pd import numpy as np data = pd.DataFrame( { "boolean_": [True, False], "smallint_": np.array([0, 1], dtype=np.int8), "int_": np.array([0, 1], dtype=np.int32), "bigint_": np.array([0, 1], dtype=np.int64), "float_": np.array([0, 1], dtype=np.float32), "double_": np.array([0, 1], dtype=np.float64), "varchar_": ["a", "b"], "text_": ['a', 'b'], "time_": [datetime.time(0, 11, 59), datetime.time(13)], "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")], "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)], }, columns=[ 'boolean_', 'smallint_', 'int_', 'bigint_', 'float_', 'double_', 'varchar_', 'text_', 'time_', 'timestamp_', 'date_' ]) result = _pandas_loaders.build_input_columnar(data, preserve_index=False) nulls = [False, False] expected = [ TColumn(TColumnData(int_col=[True, False]), nulls=nulls), TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int8)), nulls=nulls), # noqa TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int32)), nulls=nulls), # noqa TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int64)), nulls=nulls), # noqa TColumn(TColumnData(real_col=np.array([0, 1], dtype=np.float32)), nulls=nulls), # noqa TColumn(TColumnData(real_col=np.array([0, 1], dtype=np.float64)), nulls=nulls), # noqa TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls), TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls), TColumn(TColumnData(int_col=[719, 46800]), nulls=nulls), TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls), # noqa TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls) ] assert_columnar_equal(result, expected)