Exemple #1
0
    def test_build_row_desc(self):
        pd = pytest.importorskip("pandas")
        import numpy as np
        from mapd.ttypes import TTypeInfo, TColumnType

        data = pd.DataFrame({
            "boolean_": [True, False],
            "smallint_": np.array([0, 1], dtype=np.int8),
            "int_": np.array([0, 1], dtype=np.int32),
            "bigint_": np.array([0, 1], dtype=np.int64),
            "float_": np.array([0, 1], dtype=np.float32),
            "double_": np.array([0, 1], dtype=np.float64),
            "varchar_": ["a", "b"],
            "text_": ['a', 'b'],
            "time_": [datetime.time(0, 11, 59), datetime.time(13)],
            "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")],
            "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)],
        }, columns=['boolean_', 'smallint_', 'int_', 'bigint_', 'float_',
                    'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
                    'date_'])
        result = _pandas_loaders.build_row_desc(data)
        expected = [
            TColumnType(col_name='boolean_',
                        col_type=TTypeInfo(type=10),
                        is_reserved_keyword=None),
            TColumnType(col_name='smallint_',
                        col_type=TTypeInfo(type=0),
                        is_reserved_keyword=None),
            TColumnType(col_name='int_',
                        col_type=TTypeInfo(type=1),
                        is_reserved_keyword=None),
            TColumnType(col_name='bigint_',
                        col_type=TTypeInfo(type=2)),
            TColumnType(col_name='float_',
                        col_type=TTypeInfo(type=3)),
            TColumnType(col_name='double_',
                        col_type=TTypeInfo(type=5)),
            TColumnType(col_name='varchar_',
                        col_type=TTypeInfo(type=6)),
            TColumnType(col_name='text_',
                        col_type=TTypeInfo(type=6)),
            TColumnType(col_name='time_',
                        col_type=TTypeInfo(type=7)),
            TColumnType(col_name='timestamp_',
                        col_type=TTypeInfo(type=8)),
            TColumnType(col_name='date_',
                        col_type=TTypeInfo(type=9))
        ]

        assert result == expected

        data.index.name = 'idx'
        result = _pandas_loaders.build_row_desc(data, preserve_index=True)
        expected.insert(0, TColumnType(col_name='idx',
                                       col_type=TTypeInfo(type=2)))

        assert result == expected
Exemple #2
0
    def test_create_non_pandas_raises(self):
        with pytest.raises(TypeError) as m:
            _pandas_loaders.build_row_desc([(1, 'a'), (2, 'b')])

        assert m.match('is not supported for type ')
Exemple #3
0
    def test_build_row_desc(self):

        data = pd.DataFrame(
            {
                'boolean_': [True, False],
                'smallint_':
                np.array([0, 1], dtype=np.int16),
                'int_':
                np.array([0, 1], dtype=np.int32),
                'bigint_':
                np.array([0, 1], dtype=np.int64),
                'float_':
                np.array([0, 1], dtype=np.float32),
                'double_':
                np.array([0, 1], dtype=np.float64),
                'varchar_': ['a', 'b'],
                'text_': ['a', 'b'],
                'time_': [datetime.time(0, 11, 59),
                          datetime.time(13)],
                'timestamp1_': [pd.Timestamp('2016'),
                                pd.Timestamp('2017')],
                'timestamp2_': [
                    np.datetime64('2016-01-01 01:01:01.001001001'),
                    np.datetime64('2017-01-01 01:01:01.001001001'),
                ],
                'date_': [
                    datetime.date(2016, 1, 1),
                    datetime.date(2017, 1, 1),
                ],
            },
            columns=[
                'boolean_',
                'smallint_',
                'int_',
                'bigint_',
                'float_',
                'double_',
                'varchar_',
                'text_',
                'time_',
                'timestamp1_',
                'timestamp2_',
                'date_',
            ],
        )
        result = _pandas_loaders.build_row_desc(data)
        expected = [
            TColumnType(
                col_name='boolean_',
                col_type=TTypeInfo(type=10),
                is_reserved_keyword=None,
            ),
            TColumnType(
                col_name='smallint_',
                col_type=TTypeInfo(type=0),
                is_reserved_keyword=None,
            ),
            TColumnType(
                col_name='int_',
                col_type=TTypeInfo(type=1),
                is_reserved_keyword=None,
            ),
            TColumnType(col_name='bigint_', col_type=TTypeInfo(type=2)),
            TColumnType(col_name='float_', col_type=TTypeInfo(type=3)),
            TColumnType(col_name='double_', col_type=TTypeInfo(type=5)),
            TColumnType(col_name='varchar_',
                        col_type=TTypeInfo(type=6, encoding=4)),
            TColumnType(col_name='text_',
                        col_type=TTypeInfo(type=6, encoding=4)),
            TColumnType(col_name='time_', col_type=TTypeInfo(type=7)),
            TColumnType(col_name='timestamp1_', col_type=TTypeInfo(type=8)),
            TColumnType(col_name='timestamp2_',
                        col_type=TTypeInfo(type=8, precision=9)),
            TColumnType(col_name='date_', col_type=TTypeInfo(type=9)),
        ]

        assert result == expected

        data.index.name = 'idx'
        result = _pandas_loaders.build_row_desc(data, preserve_index=True)
        expected.insert(
            0, TColumnType(col_name='idx', col_type=TTypeInfo(type=2)))

        assert result == expected