def get_many_types():
    # returning them from a function is required because of pa.dictionary
    # type holds a pyarrow array and test_array.py::test_toal_bytes_allocated
    # checks that the default memory pool has zero allocated bytes
    return (pa.null(), pa.bool_(), pa.int32(), pa.time32('s'), pa.time64('us'),
            pa.date32(), pa.timestamp('us'), pa.timestamp('us', tz='UTC'),
            pa.timestamp('us', tz='Europe/Paris'), pa.float16(), pa.float32(),
            pa.float64(), pa.decimal128(19, 4), pa.string(), pa.binary(),
            pa.binary(10), pa.list_(pa.int32()),
            pa.struct([
                pa.field('a', pa.int32()),
                pa.field('b', pa.int8()),
                pa.field('c', pa.string())
            ]),
            pa.struct([
                pa.field('a', pa.int32(), nullable=False),
                pa.field('b', pa.int8(), nullable=False),
                pa.field('c', pa.string())
            ]),
            pa.union(
                [pa.field('a', pa.binary(10)),
                 pa.field('b', pa.string())],
                mode=pa.lib.UnionMode_DENSE),
            pa.union(
                [pa.field('a', pa.binary(10)),
                 pa.field('b', pa.string())],
                mode=pa.lib.UnionMode_SPARSE),
            pa.union([
                pa.field('a', pa.binary(10), nullable=False),
                pa.field('b', pa.string())
            ],
                     mode=pa.lib.UnionMode_SPARSE),
            pa.dictionary(pa.int32(), pa.array(['a', 'b', 'c'])))
Esempio n. 2
0
def test_cast_from_null():
    in_data = [None] * 3
    in_type = pa.null()
    out_types = [
        pa.null(),
        pa.uint8(),
        pa.float16(),
        pa.utf8(),
        pa.binary(),
        pa.binary(10),
        pa.list_(pa.int16()),
        pa.decimal128(19, 4),
        pa.timestamp('us'),
        pa.timestamp('us', tz='UTC'),
        pa.timestamp('us', tz='Europe/Paris'),
        pa.struct([pa.field('a', pa.int32()),
                   pa.field('b', pa.list_(pa.int8())),
                   pa.field('c', pa.string())]),
        ]
    for out_type in out_types:
        _check_cast_case((in_data, in_type, in_data, out_type))

    out_types = [
        pa.dictionary(pa.int32(), pa.string()),
        pa.union([pa.field('a', pa.binary(10)),
                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
        pa.union([pa.field('a', pa.binary(10)),
                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
        ]
    in_arr = pa.array(in_data, type=pa.null())
    for out_type in out_types:
        with pytest.raises(NotImplementedError):
            in_arr.cast(out_type)
Esempio n. 3
0
def test_cast_from_null():
    in_data = [None] * 3
    in_type = pa.null()
    out_types = [
        pa.null(),
        pa.uint8(),
        pa.float16(),
        pa.utf8(),
        pa.binary(),
        pa.binary(10),
        pa.list_(pa.int16()),
        pa.large_list(pa.uint8()),
        pa.decimal128(19, 4),
        pa.timestamp('us'),
        pa.timestamp('us', tz='UTC'),
        pa.timestamp('us', tz='Europe/Paris'),
        pa.struct([pa.field('a', pa.int32()),
                   pa.field('b', pa.list_(pa.int8())),
                   pa.field('c', pa.string())]),
        ]
    for out_type in out_types:
        _check_cast_case((in_data, in_type, in_data, out_type))

    out_types = [
        pa.dictionary(pa.int32(), pa.string()),
        pa.union([pa.field('a', pa.binary(10)),
                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
        pa.union([pa.field('a', pa.binary(10)),
                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
        ]
    in_arr = pa.array(in_data, type=pa.null())
    for out_type in out_types:
        with pytest.raises(NotImplementedError):
            in_arr.cast(out_type)
def test_union_type():
    def check_fields(ty, fields):
        assert ty.num_fields == len(fields)
        assert [ty[i] for i in range(ty.num_fields)] == fields

    fields = [pa.field('x', pa.list_(pa.int32())),
              pa.field('y', pa.binary())]
    type_codes = [5, 9]

    sparse_factories = [
        partial(pa.union, mode='sparse'),
        partial(pa.union, mode=pa.lib.UnionMode_SPARSE),
        pa.sparse_union,
    ]

    dense_factories = [
        partial(pa.union, mode='dense'),
        partial(pa.union, mode=pa.lib.UnionMode_DENSE),
        pa.dense_union,
    ]

    for factory in sparse_factories:
        ty = factory(fields)
        assert isinstance(ty, pa.SparseUnionType)
        assert ty.mode == 'sparse'
        check_fields(ty, fields)
        assert ty.type_codes == [0, 1]
        ty = factory(fields, type_codes=type_codes)
        assert ty.mode == 'sparse'
        check_fields(ty, fields)
        assert ty.type_codes == type_codes
        # Invalid number of type codes
        with pytest.raises(ValueError):
            factory(fields, type_codes=type_codes[1:])

    for factory in dense_factories:
        ty = factory(fields)
        assert isinstance(ty, pa.DenseUnionType)
        assert ty.mode == 'dense'
        check_fields(ty, fields)
        assert ty.type_codes == [0, 1]
        ty = factory(fields, type_codes=type_codes)
        assert ty.mode == 'dense'
        check_fields(ty, fields)
        assert ty.type_codes == type_codes
        # Invalid number of type codes
        with pytest.raises(ValueError):
            factory(fields, type_codes=type_codes[1:])

    for mode in ('unknown', 2):
        with pytest.raises(ValueError, match='Invalid union mode'):
            pa.union(fields, mode=mode)
Esempio n. 5
0
def test_type_schema_pickling():
    cases = [
        pa.int8(),
        pa.string(),
        pa.binary(),
        pa.binary(10),
        pa.list_(pa.string()),
        pa.map_(pa.string(), pa.int8()),
        pa.struct([
            pa.field('a', 'int8'),
            pa.field('b', 'string')
        ]),
        pa.union([
            pa.field('a', pa.int8()),
            pa.field('b', pa.int16())
        ], pa.lib.UnionMode_SPARSE),
        pa.union([
            pa.field('a', pa.int8()),
            pa.field('b', pa.int16())
        ], pa.lib.UnionMode_DENSE),
        pa.time32('s'),
        pa.time64('us'),
        pa.date32(),
        pa.date64(),
        pa.timestamp('ms'),
        pa.timestamp('ns'),
        pa.decimal128(12, 2),
        pa.decimal256(76, 38),
        pa.field('a', 'string', metadata={b'foo': b'bar'}),
        pa.list_(pa.field("element", pa.int64())),
        pa.large_list(pa.field("element", pa.int64())),
        pa.map_(pa.field("key", pa.string(), nullable=False),
                pa.field("value", pa.int8()))
    ]

    for val in cases:
        roundtripped = pickle.loads(pickle.dumps(val))
        assert val == roundtripped

    fields = []
    for i, f in enumerate(cases):
        if isinstance(f, pa.Field):
            fields.append(f)
        else:
            fields.append(pa.field('_f{}'.format(i), f))

    schema = pa.schema(fields, metadata={b'foo': b'bar'})
    roundtripped = pickle.loads(pickle.dumps(schema))
    assert schema == roundtripped
def test_is_union():
    for mode in [pa.lib.UnionMode_SPARSE, pa.lib.UnionMode_DENSE]:
        assert types.is_union(pa.union([pa.field('a', pa.int32()),
                                        pa.field('b', pa.int8()),
                                        pa.field('c', pa.string())],
                                       mode=mode))
    assert not types.is_union(pa.list_(pa.int32()))
Esempio n. 7
0
def test_is_union():
    for mode in [pa.lib.UnionMode_SPARSE, pa.lib.UnionMode_DENSE]:
        assert types.is_union(pa.union([pa.field('a', pa.int32()),
                                        pa.field('b', pa.int8()),
                                        pa.field('c', pa.string())],
                                       mode=mode))
    assert not types.is_union(pa.list_(pa.int32()))
Esempio n. 8
0
def arrow_type(field):
    if field.name == "decimal":
        return pa.decimal128(field.precision)
    elif field.name == "uniontype":
        return pa.union(field.cont_types)
    elif field.name == "array":
        return pa.list_(field.type)
    elif field.name == "map":
        return pa.map_(field.key, field.value)
    elif field.name == "struct":
        return pa.struct(field.fields)
    else:
        types = {
            "boolean": pa.bool_(),
            "tinyint": pa.int8(),
            "smallint": pa.int16(),
            "int": pa.int32(),
            "bigint": pa.int64(),
            "float": pa.float32(),
            "double": pa.float64(),
            "string": pa.string(),
            "char": pa.string(),
            "varchar": pa.string(),
            "binary": pa.binary(),
            "timestamp": pa.timestamp("ms"),
            "date": pa.date32(),
        }
        if field.name not in types:
            raise ValueError("Cannot convert to arrow type: " + field.name)
        return types[field.name]
Esempio n. 9
0
def arrow_type(field):
    if field.name == 'decimal':
        return pa.decimal128(field.precision)
    elif field.name == 'uniontype':
        return pa.union(field.cont_types)
    elif field.name == 'array':
        return pa.list_(field.type)
    elif field.name == 'map':
        return pa.map_(field.key, field.value)
    elif field.name == 'struct':
        return pa.struct(field.fields)
    else:
        types = {
            'boolean': pa.bool_(),
            'tinyint': pa.int8(),
            'smallint': pa.int16(),
            'int': pa.int32(),
            'bigint': pa.int64(),
            'float': pa.float32(),
            'double': pa.float64(),
            'string': pa.string(),
            'char': pa.string(),
            'varchar': pa.string(),
            'binary': pa.binary(),
            'timestamp': pa.timestamp('ms'),
            'date': pa.date32(),
        }
        if field.name not in types:
            raise ValueError('Cannot convert to arrow type: ' + field.name)
        return types[field.name]
Esempio n. 10
0
def test_is_union():
    assert types.is_union(
        pa.union([
            pa.field('a', pa.int32()),
            pa.field('b', pa.int8()),
            pa.field('c', pa.string())
        ], pa.lib.UnionMode_SPARSE))
    assert not types.is_union(pa.list_(pa.int32()))
def test_union_type():
    def check_fields(ty, fields):
        assert ty.num_children == len(fields)
        assert [ty[i] for i in range(ty.num_children)] == fields

    fields = [pa.field('x', pa.list_(pa.int32())), pa.field('y', pa.binary())]
    for mode in ('sparse', pa.lib.UnionMode_SPARSE):
        ty = pa.union(fields, mode=mode)
        assert ty.mode == 'sparse'
        check_fields(ty, fields)
    for mode in ('dense', pa.lib.UnionMode_DENSE):
        ty = pa.union(fields, mode=mode)
        assert ty.mode == 'dense'
        check_fields(ty, fields)
    for mode in ('unknown', 2):
        with pytest.raises(ValueError, match='Invalid union mode'):
            pa.union(fields, mode=mode)
Esempio n. 12
0
def test_union_type():
    def check_fields(ty, fields):
        assert ty.num_children == len(fields)
        assert [ty[i] for i in range(ty.num_children)] == fields

    fields = [pa.field('x', pa.list_(pa.int32())),
              pa.field('y', pa.binary())]
    for mode in ('sparse', pa.lib.UnionMode_SPARSE):
        ty = pa.union(fields, mode=mode)
        assert ty.mode == 'sparse'
        check_fields(ty, fields)
    for mode in ('dense', pa.lib.UnionMode_DENSE):
        ty = pa.union(fields, mode=mode)
        assert ty.mode == 'dense'
        check_fields(ty, fields)
    for mode in ('unknown', 2):
        with pytest.raises(ValueError, match='Invalid union mode'):
            pa.union(fields, mode=mode)
Esempio n. 13
0
def test_type_schema_pickling():
    cases = [
        pa.int8(),
        pa.string(),
        pa.binary(),
        pa.binary(10),
        pa.list_(pa.string()),
        pa.struct([
            pa.field('a', 'int8'),
            pa.field('b', 'string')
        ]),
        pa.union([
            pa.field('a', pa.int8()),
            pa.field('b', pa.int16())
        ], pa.lib.UnionMode_SPARSE),
        pa.union([
            pa.field('a', pa.int8()),
            pa.field('b', pa.int16())
        ], pa.lib.UnionMode_DENSE),
        pa.time32('s'),
        pa.time64('us'),
        pa.date32(),
        pa.date64(),
        pa.timestamp('ms'),
        pa.timestamp('ns'),
        pa.decimal128(12, 2),
        pa.field('a', 'string', metadata={b'foo': b'bar'})
    ]

    for val in cases:
        roundtripped = pickle.loads(pickle.dumps(val))
        assert val == roundtripped

    fields = []
    for i, f in enumerate(cases):
        if isinstance(f, pa.Field):
            fields.append(f)
        else:
            fields.append(pa.field('_f{}'.format(i), f))

    schema = pa.schema(fields, metadata={b'foo': b'bar'})
    roundtripped = pickle.loads(pickle.dumps(schema))
    assert schema == roundtripped
Esempio n. 14
0
def test_union_type():
    def check_fields(ty, fields):
        assert ty.num_fields == len(fields)
        assert [ty[i] for i in range(ty.num_fields)] == fields

    fields = [pa.field('x', pa.list_(pa.int32())),
              pa.field('y', pa.binary())]
    type_codes = [5, 9]

    for mode in ('sparse', pa.lib.UnionMode_SPARSE):
        ty = pa.union(fields, mode=mode)
        assert ty.mode == 'sparse'
        check_fields(ty, fields)
        assert ty.type_codes == [0, 1]
        ty = pa.union(fields, mode=mode, type_codes=type_codes)
        assert ty.mode == 'sparse'
        check_fields(ty, fields)
        assert ty.type_codes == type_codes
        # Invalid number of type codes
        with pytest.raises(ValueError):
            pa.union(fields, mode=mode, type_codes=type_codes[1:])

    for mode in ('dense', pa.lib.UnionMode_DENSE):
        ty = pa.union(fields, mode=mode)
        assert ty.mode == 'dense'
        check_fields(ty, fields)
        assert ty.type_codes == [0, 1]
        ty = pa.union(fields, mode=mode, type_codes=type_codes)
        assert ty.mode == 'dense'
        check_fields(ty, fields)
        assert ty.type_codes == type_codes
        # Invalid number of type codes
        with pytest.raises(ValueError):
            pa.union(fields, mode=mode, type_codes=type_codes[1:])

    for mode in ('unknown', 2):
        with pytest.raises(ValueError, match='Invalid union mode'):
            pa.union(fields, mode=mode)
Esempio n. 15
0
def get_many_types():
    # returning them from a function is required because of pa.dictionary
    # type holds a pyarrow array and test_array.py::test_toal_bytes_allocated
    # checks that the default memory pool has zero allocated bytes
    return (
        pa.null(),
        pa.bool_(),
        pa.int32(),
        pa.time32('s'),
        pa.time64('us'),
        pa.date32(),
        pa.timestamp('us'),
        pa.timestamp('us', tz='UTC'),
        pa.timestamp('us', tz='Europe/Paris'),
        pa.float16(),
        pa.float32(),
        pa.float64(),
        pa.decimal128(19, 4),
        pa.string(),
        pa.binary(),
        pa.binary(10),
        pa.list_(pa.int32()),
        pa.struct([pa.field('a', pa.int32()),
                   pa.field('b', pa.int8()),
                   pa.field('c', pa.string())]),
        pa.struct([pa.field('a', pa.int32(), nullable=False),
                   pa.field('b', pa.int8(), nullable=False),
                   pa.field('c', pa.string())]),
        pa.union([pa.field('a', pa.binary(10)),
                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
        pa.union([pa.field('a', pa.binary(10)),
                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
        pa.union([pa.field('a', pa.binary(10), nullable=False),
                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
        pa.dictionary(pa.int32(), pa.string())
    )
Esempio n. 16
0
def test_is_union():
    assert types.is_union(pa.union([pa.field('a', pa.int32()),
                                    pa.field('b', pa.int8()),
                                    pa.field('c', pa.string())],
                                   pa.lib.UnionMode_SPARSE))
    assert not types.is_union(pa.list_(pa.int32()))
Esempio n. 17
0
    pa.date32(),
    pa.timestamp('us'),
    pa.timestamp('us', tz='UTC'),
    pa.timestamp('us', tz='Europe/Paris'),
    pa.float16(),
    pa.float32(),
    pa.float64(),
    pa.decimal128(19, 4),
    pa.string(),
    pa.binary(),
    pa.binary(10),
    pa.list_(pa.int32()),
    pa.struct([pa.field('a', pa.int32()),
               pa.field('b', pa.int8()),
               pa.field('c', pa.string())]),
    pa.union([pa.field('a', pa.binary(10)),
              pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
    pa.union([pa.field('a', pa.binary(10)),
              pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
    # XXX Needs array pickling
    # pa.dictionary(pa.int32(), pa.array(['a', 'b', 'c'])),
]


def test_is_boolean():
    assert types.is_boolean(pa.bool_())
    assert not types.is_boolean(pa.int8())


def test_is_integer():
    signed_ints = [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
    unsigned_ints = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]
Esempio n. 18
0
    pa.struct(
        [
            pa.field("a", pa.int32(), nullable=False),
            pa.field("b", pa.int8(), nullable=False),
            pa.field("c", pa.string()),
        ]
    ),
    pa.dictionary(pa.int8(), pa.string()),
]

_unsupported_pyarrow_types = [
    pa.decimal256(76, 38),
    pa.duration("s"),
    pa.map_(pa.string(), pa.int32()),
    pa.union(
        [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
        mode=pa.lib.UnionMode_DENSE,
    ),
    pa.union(
        [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
        mode=pa.lib.UnionMode_DENSE,
        type_codes=[4, 8],
    ),
    pa.union(
        [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
        mode=pa.lib.UnionMode_SPARSE,
    ),
    pa.union(
        [
            pa.field("a", pa.binary(10), nullable=False),
            pa.field("b", pa.string()),
        ],
Esempio n. 19
0
    pa.timestamp('us', tz='Europe/Paris'),
    pa.float16(),
    pa.float32(),
    pa.float64(),
    pa.decimal128(19, 4),
    pa.string(),
    pa.binary(),
    pa.binary(10),
    pa.list_(pa.int32()),
    pa.struct([
        pa.field('a', pa.int32()),
        pa.field('b', pa.int8()),
        pa.field('c', pa.string())
    ]),
    pa.union([pa.field('a', pa.binary(10)),
              pa.field('b', pa.string())],
             mode=pa.lib.UnionMode_DENSE),
    pa.union([pa.field('a', pa.binary(10)),
              pa.field('b', pa.string())],
             mode=pa.lib.UnionMode_SPARSE),
    # XXX Needs array pickling
    # pa.dictionary(pa.int32(), pa.array(['a', 'b', 'c'])),
]


def test_is_boolean():
    assert types.is_boolean(pa.bool_())
    assert not types.is_boolean(pa.int8())


def test_is_integer():