Ejemplo n.º 1
0
    def test_build_table_columnar_pandas(self):

        data = pd.DataFrame({
            "boolean_": [True, False],
            "smallint_": np.array([0, 1], dtype=np.int16),
            "int_": np.array([0, 1], dtype=np.int32),
            "bigint_": np.array([0, 1], dtype=np.int64),
            "float_": np.array([0, 1], dtype=np.float32),
            "double_": np.array([0, 1], dtype=np.float64),
            "varchar_": ["a", "b"],
            "text_": ['a', 'b'],
            "time_": [datetime.time(0, 11, 59), datetime.time(13)],
            "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")],
            "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)],
        }, columns=['boolean_', 'smallint_', 'int_', 'bigint_', 'float_',
                    'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
                    'date_'])
        result = _pandas_loaders.build_input_columnar(data,
                                                      preserve_index=False)

        nulls = [False, False]
        expected = [
            TColumn(TColumnData(int_col=[True, False]), nulls=nulls),
            TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int16)), nulls=nulls),  # noqa
            TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int32)), nulls=nulls),  # noqa
            TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int64)), nulls=nulls),  # noqa
            TColumn(TColumnData(real_col=np.array([0, 1], dtype=np.float32)), nulls=nulls),  # noqa
            TColumn(TColumnData(real_col=np.array([0, 1], dtype=np.float64)), nulls=nulls),  # noqa
            TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls),
            TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls),
            TColumn(TColumnData(int_col=[719, 46800]), nulls=nulls),
            TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls),  # noqa
            TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls)
        ]
        assert_columnar_equal(result[0], expected)
Ejemplo n.º 2
0
    def test_build_table_columnar(self):

        from pymapd._pandas_loaders import build_input_columnar

        data = pd.DataFrame({"a": [1, 2, 3], "b": [1.1, 2.2, 3.3]})
        nulls = [False] * 3
        result = build_input_columnar(data, preserve_index=False)
        expected = [
            TColumn(TColumnData(int_col=[1, 2, 3]), nulls=nulls),
            TColumn(TColumnData(real_col=[1.1, 2.2, 3.3]), nulls=nulls)
        ]
        assert_columnar_equal(result[0], expected)
Ejemplo n.º 3
0
    def test_build_table_columnar_nulls(self):
        import pandas as pd
        import numpy as np

        data = pd.DataFrame(
            {
                "boolean_": [True, False, None],
                "bigint_":
                np.array([0, 1, None], dtype=np.object),
                "double_":
                np.array([0, 1, None], dtype=np.float64),
                "varchar_": ["a", "b", None],
                "text_": ['a', 'b', None],
                "time_": [datetime.time(0, 11, 59),
                          datetime.time(13), None],
                "timestamp_":
                [pd.Timestamp("2016"),
                 pd.Timestamp("2017"), None],
                "date_":
                [datetime.date(2016, 1, 1),
                 datetime.date(2017, 1, 1), None],
            },
            columns=[
                'boolean_', 'bigint_', 'double_', 'varchar_', 'text_', 'time_',
                'timestamp_', 'date_'
            ])
        result = _pandas_loaders.build_input_columnar(data,
                                                      preserve_index=False)

        nulls = [False, False, True]
        int_na = -2147483648
        bigint_na = -9223372036854775808
        ns_na = -9223372037

        expected = [
            TColumn(TColumnData(int_col=[1, 0, int_na]), nulls=nulls),
            TColumn(
                TColumnData(int_col=np.array([0, 1, int_na], dtype=np.int64)),
                nulls=nulls),  # noqa
            TColumn(TColumnData(
                real_col=np.array([0, 1, np.nan], dtype=np.float64)),
                    nulls=nulls),  # noqa
            TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls),
            TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls),
            TColumn(TColumnData(int_col=[719, 46800, bigint_na]), nulls=nulls),
            TColumn(TColumnData(int_col=[1451606400, 1483228800, ns_na]),
                    nulls=nulls),  # noqa
            TColumn(TColumnData(int_col=[1451606400, 1483228800, bigint_na]),
                    nulls=nulls)  # noqa
        ]
        assert_columnar_equal(result, expected)
Ejemplo n.º 4
0
    def test_build_table_columnar(self, data, col_properties):

        from pymapd._pandas_loaders import build_input_columnar

        col_types = get_col_types(col_properties)

        result = build_input_columnar(
            data,
            preserve_index=False,
            col_names=data.columns,
            col_types=col_types,
        )
        expected = get_expected(data, col_properties)

        assert data.shape[1] == len(expected)
        assert_columnar_equal(result[0], expected)
Ejemplo n.º 5
0
    def test_build_table_columnar_nulls(self):
        import pandas as pd
        import numpy as np

        data = pd.DataFrame({
            "boolean_": [True, False, None],
            # Currently Pandas does not support storing None or NaN
            # in integer columns, so int cols with null
            # need to be objects. This means our type detection will be
            # unreliable since if there is no number outside the int32
            # bounds in a column with nulls then we will be assuming int
            "int_": np.array([0, 1, None], dtype=np.object),
            "bigint_": np.array([0, 9223372036854775807, None],
                                dtype=np.object),
            "double_": np.array([0, 1, None], dtype=np.float64),
            "varchar_": ["a", "b", None],
            "text_": ['a', 'b', None],
            "time_": [datetime.time(0, 11, 59), datetime.time(13), None],
            "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017"), None],
            "date_": [datetime.date(1001, 1, 1), datetime.date(2017, 1, 1),
                      None],
        }, columns=['boolean_', 'int_', 'bigint_',
                    'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
                    'date_'])
        result = _pandas_loaders.build_input_columnar(data,
                                                      preserve_index=False)

        nulls = [False, False, True]
        bool_na = -128
        int_na = -2147483648
        bigint_na = -9223372036854775808
        ns_na = -9223372037
        double_na = 0

        expected = [
            TColumn(TColumnData(int_col=[1, 0, bool_na]), nulls=nulls),
            TColumn(TColumnData(int_col=np.array([0, 1, int_na], dtype=np.int32)), nulls=nulls),  # noqa
            TColumn(TColumnData(int_col=np.array([0, 9223372036854775807, bigint_na], dtype=np.int64)), nulls=nulls),  # noqa
            TColumn(TColumnData(real_col=np.array([0, 1, double_na], dtype=np.float64)), nulls=nulls),  # noqa
            TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls),
            TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls),
            TColumn(TColumnData(int_col=[719, 46800, bigint_na]), nulls=nulls),
            TColumn(TColumnData(int_col=[1451606400, 1483228800, ns_na]), nulls=nulls),  # noqa
            TColumn(TColumnData(int_col=[-30578688000, 1483228800, bigint_na]), nulls=nulls)  # noqa
        ]
        assert_columnar_equal(result[0], expected)
Ejemplo n.º 6
0
    def test_build_table_columnar_nulls(self):
        common_col_params = dict(
            nullable=True,
            scale=0,
            comp_param=0,
            encoding='NONE',
            is_array=False,
        )

        col_types = [
            ColumnDetails(name='boolean_',
                          type='BOOL',
                          precision=0,
                          **common_col_params),
            ColumnDetails(name='int_',
                          type='INT',
                          precision=0,
                          **common_col_params),
            ColumnDetails(name='bigint_',
                          type='BIGINT',
                          precision=0,
                          **common_col_params),
            ColumnDetails(name='double_',
                          type='DOUBLE',
                          precision=0,
                          **common_col_params),
            ColumnDetails(name='varchar_',
                          type='STR',
                          precision=0,
                          **common_col_params),
            ColumnDetails(name='text_',
                          type='STR',
                          precision=0,
                          **common_col_params),
            ColumnDetails(name='time_',
                          type='TIME',
                          precision=0,
                          **common_col_params),
            ColumnDetails(
                name='timestamp_',
                type='TIMESTAMP',
                **common_col_params,
                precision=0,
            ),
            ColumnDetails(name='date_',
                          type='DATE',
                          precision=0,
                          **common_col_params),
        ]

        data = pd.DataFrame({
            'boolean_': [True, False, None],
            # Currently Pandas does not support storing None or NaN
            # in integer columns, so int cols with null
            # need to be objects. This means our type detection will be
            # unreliable since if there is no number outside the int32
            # bounds in a column with nulls then we will be assuming int
            'int_':
            np.array([0, 1, None], dtype=np.object),
            'bigint_':
            np.array([0, 9223372036854775807, None], dtype=np.object),
            'double_':
            np.array([0, 1, None], dtype=np.float64),
            'varchar_': ['a', 'b', None],
            'text_': ['a', 'b', None],
            'time_': [datetime.time(0, 11, 59),
                      datetime.time(13), None],
            'timestamp_': [
                pd.Timestamp('2016'),
                pd.Timestamp('2017'),
                None,
            ],
            'date_': [
                datetime.date(1001, 1, 1),
                datetime.date(2017, 1, 1),
                None,
            ],
        })

        result = _pandas_loaders.build_input_columnar(
            data,
            preserve_index=False,
            col_names=data.columns,
            col_types=col_types,
        )

        nulls = [False, False, True]
        bool_na = -128
        int_na = -2147483648
        bigint_na = -9223372036854775808
        ns_na = -9223372037
        double_na = 0

        expected = [
            TColumn(TColumnData(int_col=[1, 0, bool_na]), nulls=nulls),
            TColumn(
                TColumnData(int_col=np.array([0, 1, int_na], dtype=np.int32)),
                nulls=nulls,
            ),  # noqa
            TColumn(
                TColumnData(int_col=np.array(
                    [0, 9223372036854775807, bigint_na], dtype=np.int64)),
                nulls=nulls,
            ),  # noqa
            TColumn(
                TColumnData(
                    real_col=np.array([0, 1, double_na], dtype=np.float64)),
                nulls=nulls,
            ),  # noqa
            TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls),
            TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls),
            TColumn(TColumnData(int_col=[719, 46800, bigint_na]), nulls=nulls),
            TColumn(
                TColumnData(int_col=[1451606400, 1483228800, ns_na]),
                nulls=nulls,
            ),  # noqa
            TColumn(
                TColumnData(int_col=[-30578688000, 1483228800, bigint_na]),
                nulls=nulls,
            ),  # noqa
        ]
        assert_columnar_equal(result[0], expected)
Ejemplo n.º 7
0
    def test_build_table_columnar_pandas(self):
        common_col_params = dict(
            nullable=True,
            precision=0,
            scale=0,
            comp_param=0,
            encoding='NONE',
            is_array=False,
        )

        col_types = [
            ColumnDetails(name='boolean_', type='BOOL', **common_col_params),
            ColumnDetails(name='smallint_',
                          type='SMALLINT',
                          **common_col_params),
            ColumnDetails(name='int_', type='INT', **common_col_params),
            ColumnDetails(name='bigint_', type='BIGINT', **common_col_params),
            ColumnDetails(name='float_', type='FLOAT', **common_col_params),
            ColumnDetails(name='double_', type='DOUBLE', **common_col_params),
            ColumnDetails(name='varchar_', type='STR', **common_col_params),
            ColumnDetails(name='text_', type='STR', **common_col_params),
            ColumnDetails(name='time_', type='TIME', **common_col_params),
            ColumnDetails(
                name='timestamp_',
                type='TIMESTAMP',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=0,
                encoding='NONE',
                is_array=False,
            ),
            ColumnDetails(name='date_', type='DATE', **common_col_params),
        ]

        data = pd.DataFrame({
            'boolean_': [True, False],
            'smallint_':
            np.array([0, 1], dtype=np.int16),
            'int_':
            np.array([0, 1], dtype=np.int32),
            'bigint_':
            np.array([0, 1], dtype=np.int64),
            'float_':
            np.array([0, 1], dtype=np.float32),
            'double_':
            np.array([0, 1], dtype=np.float64),
            'varchar_': ['a', 'b'],
            'text_': ['a', 'b'],
            'time_': [datetime.time(0, 11, 59),
                      datetime.time(13)],
            'timestamp_': [pd.Timestamp('2016'),
                           pd.Timestamp('2017')],
            'date_': [
                datetime.date(2016, 1, 1),
                datetime.date(2017, 1, 1),
            ],
        })
        result = _pandas_loaders.build_input_columnar(
            data,
            preserve_index=False,
            col_names=data.columns,
            col_types=col_types,
        )

        nulls = [False, False]
        expected = [
            TColumn(TColumnData(int_col=[True, False]), nulls=nulls),
            TColumn(
                TColumnData(int_col=np.array([0, 1], dtype=np.int16)),
                nulls=nulls,
            ),  # noqa
            TColumn(
                TColumnData(int_col=np.array([0, 1], dtype=np.int32)),
                nulls=nulls,
            ),  # noqa
            TColumn(
                TColumnData(int_col=np.array([0, 1], dtype=np.int64)),
                nulls=nulls,
            ),  # noqa
            TColumn(
                TColumnData(real_col=np.array([0, 1], dtype=np.float32)),
                nulls=nulls,
            ),  # noqa
            TColumn(
                TColumnData(real_col=np.array([0, 1], dtype=np.float64)),
                nulls=nulls,
            ),  # noqa
            TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls),
            TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls),
            TColumn(TColumnData(int_col=[719, 46800]), nulls=nulls),
            TColumn(TColumnData(int_col=[1451606400, 1483228800]),
                    nulls=nulls),  # noqa
            TColumn(TColumnData(int_col=[1451606400, 1483228800]),
                    nulls=nulls),
        ]
        assert_columnar_equal(result[0], expected)