コード例 #1
0
ファイル: _pandas_loaders.py プロジェクト: kchu-mapd/pymapd
def build_row_desc(data, preserve_index=False):

    if not isinstance(data, (pd.DataFrame, gpd.GeoDataFrame)):
        # Once https://issues.apache.org/jira/browse/ARROW-1576 is complete
        # we can support pa.Table here too
        raise TypeError("Create table is not supported for type {}. "
                        "Use a pandas DataFrame, or perform the create "
                        "separately".format(type(data)))

    if preserve_index:
        data = data.reset_index()

    dtypes = []
    is_array = {}
    for col in data.columns:
        _dtype = get_mapd_dtype(data[col])
        is_array[col] = True if _dtype.startswith('ARRAY') else None
        dtypes.append((col, _dtype.replace('ARRAY/', '')))
    # row_desc :: List<TColumnType>
    row_desc = [
        TColumnType(
            name,
            TTypeInfo(getattr(TDatumType, mapd_type), is_array=is_array[name]),
        ) for name, mapd_type in dtypes
    ]

    # force text encoding dict for all string columns
    # default is TEXT ENCODING DICT(32) when only tct.col_type.encoding = 4 set
    # https://github.com/omnisci/pymapd/issues/140#issuecomment-477353420
    for tct in row_desc:
        if tct.col_type.type == 6:
            tct.col_type.encoding = 4
        elif tct.col_type.type in GEO_TYPE_ID:
            tct.col_type.precision = 23
    return row_desc
コード例 #2
0
def build_row_desc(data, preserve_index=False):

    if not isinstance(data, pd.DataFrame):
        # Once https://issues.apache.org/jira/browse/ARROW-1576 is complete
        # we can support pa.Table here too
        raise TypeError("Create table is not supported for type {}. "
                        "Use a pandas DataFrame, or perform the create "
                        "separately".format(type(data)))

    if preserve_index:
        data = data.reset_index()
    dtypes = [(col, get_mapd_dtype(data[col])) for col in data.columns]
    # row_desc :: List<TColumnType>
    row_desc = [
        TColumnType(name, TTypeInfo(getattr(TDatumType, mapd_type)))
        for name, mapd_type in dtypes
    ]

    # force text encoding dict for all string columns
    # default is TEXT ENCODING DICT(32) when only tct.col_type.encoding = 4 set
    # https://github.com/omnisci/pymapd/issues/140#issuecomment-477353420
    for tct in row_desc:
        if tct.col_type.type == 6:
            tct.col_type.encoding = 4

    return row_desc
コード例 #3
0
ファイル: test_loaders.py プロジェクト: vishalbelsare/pymapd
    def test_build_row_desc(self):

        data = pd.DataFrame(
            {
                'boolean_': [True, False],
                'smallint_':
                np.array([0, 1], dtype=np.int16),
                'int_':
                np.array([0, 1], dtype=np.int32),
                'bigint_':
                np.array([0, 1], dtype=np.int64),
                'float_':
                np.array([0, 1], dtype=np.float32),
                'double_':
                np.array([0, 1], dtype=np.float64),
                'varchar_': ['a', 'b'],
                'text_': ['a', 'b'],
                'time_': [datetime.time(0, 11, 59),
                          datetime.time(13)],
                'timestamp1_': [pd.Timestamp('2016'),
                                pd.Timestamp('2017')],
                'timestamp2_': [
                    np.datetime64('2016-01-01 01:01:01.001001001'),
                    np.datetime64('2017-01-01 01:01:01.001001001'),
                ],
                'date_': [
                    datetime.date(2016, 1, 1),
                    datetime.date(2017, 1, 1),
                ],
            },
            columns=[
                'boolean_',
                'smallint_',
                'int_',
                'bigint_',
                'float_',
                'double_',
                'varchar_',
                'text_',
                'time_',
                'timestamp1_',
                'timestamp2_',
                'date_',
            ],
        )
        result = _pandas_loaders.build_row_desc(data)
        expected = [
            TColumnType(
                col_name='boolean_',
                col_type=TTypeInfo(type=10),
                is_reserved_keyword=None,
            ),
            TColumnType(
                col_name='smallint_',
                col_type=TTypeInfo(type=0),
                is_reserved_keyword=None,
            ),
            TColumnType(
                col_name='int_',
                col_type=TTypeInfo(type=1),
                is_reserved_keyword=None,
            ),
            TColumnType(col_name='bigint_', col_type=TTypeInfo(type=2)),
            TColumnType(col_name='float_', col_type=TTypeInfo(type=3)),
            TColumnType(col_name='double_', col_type=TTypeInfo(type=5)),
            TColumnType(col_name='varchar_',
                        col_type=TTypeInfo(type=6, encoding=4)),
            TColumnType(col_name='text_',
                        col_type=TTypeInfo(type=6, encoding=4)),
            TColumnType(col_name='time_', col_type=TTypeInfo(type=7)),
            TColumnType(col_name='timestamp1_', col_type=TTypeInfo(type=8)),
            TColumnType(col_name='timestamp2_',
                        col_type=TTypeInfo(type=8, precision=9)),
            TColumnType(col_name='date_', col_type=TTypeInfo(type=9)),
        ]

        assert result == expected

        data.index.name = 'idx'
        result = _pandas_loaders.build_row_desc(data, preserve_index=True)
        expected.insert(
            0, TColumnType(col_name='idx', col_type=TTypeInfo(type=2)))

        assert result == expected
コード例 #4
0
    def test_build_row_desc(self):

        data = pd.DataFrame({
            "boolean_": [True, False],
            "smallint_": np.array([0, 1], dtype=np.int16),
            "int_": np.array([0, 1], dtype=np.int32),
            "bigint_": np.array([0, 1], dtype=np.int64),
            "float_": np.array([0, 1], dtype=np.float32),
            "double_": np.array([0, 1], dtype=np.float64),
            "varchar_": ["a", "b"],
            "text_": ['a', 'b'],
            "time_": [datetime.time(0, 11, 59), datetime.time(13)],
            "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")],
            "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)],
        }, columns=['boolean_', 'smallint_', 'int_', 'bigint_', 'float_',
                    'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
                    'date_'])
        result = _pandas_loaders.build_row_desc(data)
        expected = [
            TColumnType(col_name='boolean_',
                        col_type=TTypeInfo(type=10),
                        is_reserved_keyword=None),
            TColumnType(col_name='smallint_',
                        col_type=TTypeInfo(type=0),
                        is_reserved_keyword=None),
            TColumnType(col_name='int_',
                        col_type=TTypeInfo(type=1),
                        is_reserved_keyword=None),
            TColumnType(col_name='bigint_',
                        col_type=TTypeInfo(type=2)),
            TColumnType(col_name='float_',
                        col_type=TTypeInfo(type=3)),
            TColumnType(col_name='double_',
                        col_type=TTypeInfo(type=5)),
            TColumnType(col_name='varchar_',
                        col_type=TTypeInfo(type=6, encoding=4)),
            TColumnType(col_name='text_',
                        col_type=TTypeInfo(type=6, encoding=4)),
            TColumnType(col_name='time_',
                        col_type=TTypeInfo(type=7)),
            TColumnType(col_name='timestamp_',
                        col_type=TTypeInfo(type=8)),
            TColumnType(col_name='date_',
                        col_type=TTypeInfo(type=9))
        ]

        assert result == expected

        data.index.name = 'idx'
        result = _pandas_loaders.build_row_desc(data, preserve_index=True)
        expected.insert(0, TColumnType(col_name='idx',
                                       col_type=TTypeInfo(type=2)))

        assert result == expected
コード例 #5
0
    def test_nulls_handled(self):

        rs = TQueryResult(
            TRowSet(
                row_desc=[
                    TColumnType(col_name='a',
                                col_type=TTypeInfo(type=0, nullable=True)),
                    TColumnType(col_name='b',
                                col_type=TTypeInfo(type=1, nullable=True)),
                    TColumnType(col_name='c',
                                col_type=TTypeInfo(type=2, nullable=True)),
                    TColumnType(col_name='d',
                                col_type=TTypeInfo(type=3, nullable=True)),
                    TColumnType(col_name='e',
                                col_type=TTypeInfo(type=4, nullable=True)),
                    TColumnType(col_name='f',
                                col_type=TTypeInfo(type=5, nullable=True)),
                    TColumnType(col_name='g',
                                col_type=TTypeInfo(type=6, nullable=True)),
                    TColumnType(col_name='h',
                                col_type=TTypeInfo(type=7, nullable=True)),
                    TColumnType(col_name='i',
                                col_type=TTypeInfo(type=8, nullable=True)),
                    TColumnType(col_name='j',
                                col_type=TTypeInfo(type=9, nullable=True)),
                    TColumnType(
                        col_name='k',
                        col_type=TTypeInfo(type=10, nullable=True),
                    ),
                ],
                rows=[],
                columns=[
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(real_col=[-2147483648]),
                            nulls=[True]),  # noqa
                    TColumn(data=TColumnData(real_col=[-2147483648]),
                            nulls=[True]),  # noqa
                    TColumn(data=TColumnData(real_col=[-2147483648]),
                            nulls=[True]),  # noqa
                    TColumn(data=TColumnData(str_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                ],
                is_columnar=True,
            ))

        result = list(make_row_results_set(rs))
        assert result == [(None, ) * 11]
コード例 #6
0
    def test_sql_validate(self, con):
        from omnisci.common.ttypes import TTypeInfo

        c = con.cursor()
        c.execute('drop table if exists stocks;')
        create = ('create table stocks (date_ text, trans text, symbol text, '
                  'qty int, price float, vol float);')
        c.execute(create)

        q = "select * from stocks"
        results = con._client.sql_validate(con._session, q)
        col_names = sorted([r.col_name for r in results])
        col_types = [r.col_type for r in results]

        expected_col_names = [
            'date_',
            'price',
            'qty',
            'symbol',
            'trans',
            'vol',
        ]

        expected_types = [
            TTypeInfo(
                type=6,
                encoding=4,
                nullable=True,
                is_array=False,
                precision=0,
                scale=0,
                comp_param=32,
                size=-1,
            ),
            TTypeInfo(
                type=6,
                encoding=4,
                nullable=True,
                is_array=False,
                precision=0,
                scale=0,
                comp_param=32,
                size=-1,
            ),
            TTypeInfo(
                type=6,
                encoding=4,
                nullable=True,
                is_array=False,
                precision=0,
                scale=0,
                comp_param=32,
                size=-1,
            ),
            TTypeInfo(
                type=1,
                encoding=0,
                nullable=True,
                is_array=False,
                precision=0,
                scale=0,
                comp_param=0,
                size=-1,
            ),
            TTypeInfo(
                type=3,
                encoding=0,
                nullable=True,
                is_array=False,
                precision=0,
                scale=0,
                comp_param=0,
                size=-1,
            ),
            TTypeInfo(
                type=3,
                encoding=0,
                nullable=True,
                is_array=False,
                precision=0,
                scale=0,
                comp_param=0,
                size=-1,
            ),
        ]

        assert col_types == expected_types
        assert col_names == expected_col_names
コード例 #7
0
    def test_extract_row_details(self):
        data = [
            TColumnType(
                col_name='date_',
                col_type=TTypeInfo(
                    type=6,
                    encoding=4,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=32,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
            TColumnType(
                col_name='trans',
                col_type=TTypeInfo(
                    type=6,
                    encoding=4,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=32,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
            TColumnType(
                col_name='symbol',
                col_type=TTypeInfo(
                    type=6,
                    encoding=4,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=32,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
            TColumnType(
                col_name='qty',
                col_type=TTypeInfo(
                    type=1,
                    encoding=0,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=0,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
            TColumnType(
                col_name='price',
                col_type=TTypeInfo(
                    type=3,
                    encoding=0,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=0,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
            TColumnType(
                col_name='vol',
                col_type=TTypeInfo(
                    type=3,
                    encoding=0,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=0,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
        ]
        result = _extract_column_details(data)

        expected = [
            ColumnDetails(
                name='date_',
                type='STR',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=32,
                encoding='DICT',
                is_array=False,
            ),
            ColumnDetails(
                name='trans',
                type='STR',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=32,
                encoding='DICT',
                is_array=False,
            ),
            ColumnDetails(
                name='symbol',
                type='STR',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=32,
                encoding='DICT',
                is_array=False,
            ),
            ColumnDetails(
                name='qty',
                type='INT',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=0,
                encoding='NONE',
                is_array=False,
            ),
            ColumnDetails(
                name='price',
                type='FLOAT',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=0,
                encoding='NONE',
                is_array=False,
            ),
            ColumnDetails(
                name='vol',
                type='FLOAT',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=0,
                encoding='NONE',
                is_array=False,
            ),
        ]
        assert result == expected