예제 #1
0
def build_row_desc(data, preserve_index=False):

    if not isinstance(data, (pd.DataFrame, gpd.GeoDataFrame)):
        # Once https://issues.apache.org/jira/browse/ARROW-1576 is complete
        # we can support pa.Table here too
        raise TypeError("Create table is not supported for type {}. "
                        "Use a pandas DataFrame, or perform the create "
                        "separately".format(type(data)))

    if preserve_index:
        data = data.reset_index()

    dtypes = []
    is_array = {}
    for col in data.columns:
        _dtype = get_mapd_dtype(data[col])
        is_array[col] = True if _dtype.startswith('ARRAY') else None
        dtypes.append((col, _dtype.replace('ARRAY/', '')))
    # row_desc :: List<TColumnType>
    row_desc = [
        TColumnType(
            name,
            TTypeInfo(getattr(TDatumType, mapd_type), is_array=is_array[name]),
        ) for name, mapd_type in dtypes
    ]

    # force text encoding dict for all string columns
    # default is TEXT ENCODING DICT(32) when only tct.col_type.encoding = 4 set
    # https://github.com/omnisci/pymapd/issues/140#issuecomment-477353420
    for tct in row_desc:
        if tct.col_type.type == 6:
            tct.col_type.encoding = 4
        elif tct.col_type.type in GEO_TYPE_ID:
            tct.col_type.precision = 23
    return row_desc
예제 #2
0
    def test_build_row_desc(self):

        data = pd.DataFrame(
            {
                'boolean_': [True, False],
                'smallint_':
                np.array([0, 1], dtype=np.int16),
                'int_':
                np.array([0, 1], dtype=np.int32),
                'bigint_':
                np.array([0, 1], dtype=np.int64),
                'float_':
                np.array([0, 1], dtype=np.float32),
                'double_':
                np.array([0, 1], dtype=np.float64),
                'varchar_': ['a', 'b'],
                'text_': ['a', 'b'],
                'time_': [datetime.time(0, 11, 59),
                          datetime.time(13)],
                'timestamp1_': [pd.Timestamp('2016'),
                                pd.Timestamp('2017')],
                'timestamp2_': [
                    np.datetime64('2016-01-01 01:01:01.001001001'),
                    np.datetime64('2017-01-01 01:01:01.001001001'),
                ],
                'date_': [
                    datetime.date(2016, 1, 1),
                    datetime.date(2017, 1, 1),
                ],
            },
            columns=[
                'boolean_',
                'smallint_',
                'int_',
                'bigint_',
                'float_',
                'double_',
                'varchar_',
                'text_',
                'time_',
                'timestamp1_',
                'timestamp2_',
                'date_',
            ],
        )
        result = _pandas_loaders.build_row_desc(data)
        expected = [
            TColumnType(
                col_name='boolean_',
                col_type=TTypeInfo(type=10),
                is_reserved_keyword=None,
            ),
            TColumnType(
                col_name='smallint_',
                col_type=TTypeInfo(type=0),
                is_reserved_keyword=None,
            ),
            TColumnType(
                col_name='int_',
                col_type=TTypeInfo(type=1),
                is_reserved_keyword=None,
            ),
            TColumnType(col_name='bigint_', col_type=TTypeInfo(type=2)),
            TColumnType(col_name='float_', col_type=TTypeInfo(type=3)),
            TColumnType(col_name='double_', col_type=TTypeInfo(type=5)),
            TColumnType(col_name='varchar_',
                        col_type=TTypeInfo(type=6, encoding=4)),
            TColumnType(col_name='text_',
                        col_type=TTypeInfo(type=6, encoding=4)),
            TColumnType(col_name='time_', col_type=TTypeInfo(type=7)),
            TColumnType(col_name='timestamp1_', col_type=TTypeInfo(type=8)),
            TColumnType(col_name='timestamp2_',
                        col_type=TTypeInfo(type=8, precision=9)),
            TColumnType(col_name='date_', col_type=TTypeInfo(type=9)),
        ]

        assert result == expected

        data.index.name = 'idx'
        result = _pandas_loaders.build_row_desc(data, preserve_index=True)
        expected.insert(
            0, TColumnType(col_name='idx', col_type=TTypeInfo(type=2)))

        assert result == expected
예제 #3
0
    def test_nulls_handled(self):

        rs = TQueryResult(
            TRowSet(
                row_desc=[
                    TColumnType(col_name='a',
                                col_type=TTypeInfo(type=0, nullable=True)),
                    TColumnType(col_name='b',
                                col_type=TTypeInfo(type=1, nullable=True)),
                    TColumnType(col_name='c',
                                col_type=TTypeInfo(type=2, nullable=True)),
                    TColumnType(col_name='d',
                                col_type=TTypeInfo(type=3, nullable=True)),
                    TColumnType(col_name='e',
                                col_type=TTypeInfo(type=4, nullable=True)),
                    TColumnType(col_name='f',
                                col_type=TTypeInfo(type=5, nullable=True)),
                    TColumnType(col_name='g',
                                col_type=TTypeInfo(type=6, nullable=True)),
                    TColumnType(col_name='h',
                                col_type=TTypeInfo(type=7, nullable=True)),
                    TColumnType(col_name='i',
                                col_type=TTypeInfo(type=8, nullable=True)),
                    TColumnType(col_name='j',
                                col_type=TTypeInfo(type=9, nullable=True)),
                    TColumnType(
                        col_name='k',
                        col_type=TTypeInfo(type=10, nullable=True),
                    ),
                ],
                rows=[],
                columns=[
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(real_col=[-2147483648]),
                            nulls=[True]),  # noqa
                    TColumn(data=TColumnData(real_col=[-2147483648]),
                            nulls=[True]),  # noqa
                    TColumn(data=TColumnData(real_col=[-2147483648]),
                            nulls=[True]),  # noqa
                    TColumn(data=TColumnData(str_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                ],
                is_columnar=True,
            ))

        result = list(make_row_results_set(rs))
        assert result == [(None, ) * 11]
예제 #4
0
    def test_extract_row_details(self):
        data = [
            TColumnType(
                col_name='date_',
                col_type=TTypeInfo(
                    type=6,
                    encoding=4,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=32,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
            TColumnType(
                col_name='trans',
                col_type=TTypeInfo(
                    type=6,
                    encoding=4,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=32,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
            TColumnType(
                col_name='symbol',
                col_type=TTypeInfo(
                    type=6,
                    encoding=4,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=32,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
            TColumnType(
                col_name='qty',
                col_type=TTypeInfo(
                    type=1,
                    encoding=0,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=0,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
            TColumnType(
                col_name='price',
                col_type=TTypeInfo(
                    type=3,
                    encoding=0,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=0,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
            TColumnType(
                col_name='vol',
                col_type=TTypeInfo(
                    type=3,
                    encoding=0,
                    nullable=True,
                    is_array=False,
                    precision=0,
                    scale=0,
                    comp_param=0,
                ),
                is_reserved_keyword=False,
                src_name='',
            ),
        ]
        result = _extract_column_details(data)

        expected = [
            ColumnDetails(
                name='date_',
                type='STR',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=32,
                encoding='DICT',
                is_array=False,
            ),
            ColumnDetails(
                name='trans',
                type='STR',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=32,
                encoding='DICT',
                is_array=False,
            ),
            ColumnDetails(
                name='symbol',
                type='STR',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=32,
                encoding='DICT',
                is_array=False,
            ),
            ColumnDetails(
                name='qty',
                type='INT',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=0,
                encoding='NONE',
                is_array=False,
            ),
            ColumnDetails(
                name='price',
                type='FLOAT',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=0,
                encoding='NONE',
                is_array=False,
            ),
            ColumnDetails(
                name='vol',
                type='FLOAT',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=0,
                encoding='NONE',
                is_array=False,
            ),
        ]
        assert result == expected