def test_is_union(): for mode in [pa.lib.UnionMode_SPARSE, pa.lib.UnionMode_DENSE]: assert types.is_union(pa.union([pa.field('a', pa.int32()), pa.field('b', pa.int8()), pa.field('c', pa.string())], mode=mode)) assert not types.is_union(pa.list_(pa.int32()))
def test_is_union(): for mode in [pa.lib.UnionMode_SPARSE, pa.lib.UnionMode_DENSE]: assert types.is_union(pa.union([pa.field('a', pa.int32()), pa.field('b', pa.int8()), pa.field('c', pa.string())], mode=mode)) assert not types.is_union(pa.list_(pa.int32()))
def test_is_union(): assert types.is_union( pa.union([ pa.field('a', pa.int32()), pa.field('b', pa.int8()), pa.field('c', pa.string()) ], pa.lib.UnionMode_SPARSE)) assert not types.is_union(pa.list_(pa.int32()))
def convertPyArrowTypeToGlueType(pyarrowType: pa.DataType) -> str: if (types.is_string(pyarrowType) or types.is_unicode(pyarrowType) or types.is_large_string(pyarrowType) or types.is_large_unicode(pyarrowType)): return 'string' if (types.is_int64(pyarrowType) or types.is_uint64(pyarrowType)): return 'bigint' if (types.is_binary(pyarrowType)): return 'binary' if (types.is_boolean(pyarrowType)): return 'boolean' if (types.is_date(pyarrowType) or types.is_date32(pyarrowType) or types.is_date64(pyarrowType)): return 'date' if (types.is_decimal(pyarrowType)): return 'decimal(16,2)' if (types.is_float64(pyarrowType)): 'return double' if (types.is_float16(pyarrowType) or types.is_float32(pyarrowType)): return 'float' if (types.is_int16(pyarrowType) or types.is_int32(pyarrowType) or types.is_uint16(pyarrowType) or types.is_uint32(pyarrowType)): return 'int' if (types.is_map(pyarrowType)): return 'map' if (types.is_struct(pyarrowType)): return 'struct' if (types.is_timestamp(pyarrowType)): return 'timestamp' if (types.is_union(pyarrowType)): return 'union' return str(pyarrowType)
def _traverse(typ, counter): if isinstance(typ, Schema) or types.is_struct(typ): for field in typ: path = (field.name,) yield path, next(counter) for sub, c in _traverse(field.type, counter): yield path + sub, c elif _is_map(typ): yield from _traverse(typ.value_type, counter) elif types.is_list(typ): # Skip one index for list type, since this can never be selected # directly next(counter) yield from _traverse(typ.value_type, counter) elif types.is_union(typ): # Union types not supported, just skip the indexes for dtype in typ: next(counter) for sub_c in _traverse(dtype, counter): pass
def _traverse(typ, counter): if isinstance(typ, Schema) or types.is_struct(typ): for field in typ: path = (field.name,) yield path, next(counter) for sub, c in _traverse(field.type, counter): yield path + sub, c elif _is_map(typ): for sub_c in _traverse(typ.value_type, counter): yield sub_c elif types.is_list(typ): # Skip one index for list type, since this can never be selected # directly next(counter) for sub_c in _traverse(typ.value_type, counter): yield sub_c elif types.is_union(typ): # Union types not supported, just skip the indexes for dtype in typ: next(counter) for sub_c in _traverse(dtype, counter): pass
def test_is_union(): assert types.is_union(pa.union([pa.field('a', pa.int32()), pa.field('b', pa.int8()), pa.field('c', pa.string())], pa.lib.UnionMode_SPARSE)) assert not types.is_union(pa.list_(pa.int32()))