コード例 #1
0
ファイル: test_schema.py プロジェクト: sighingnow/arrow
def test_schema_merge():
    a = pa.schema([
        pa.field('foo', pa.int32()),
        pa.field('bar', pa.string()),
        pa.field('baz', pa.list_(pa.int8()))
    ])
    b = pa.schema([pa.field('foo', pa.int32()), pa.field('qux', pa.bool_())])
    c = pa.schema([pa.field('quux', pa.dictionary(pa.int32(), pa.string()))])
    d = pa.schema([pa.field('foo', pa.int64()), pa.field('qux', pa.bool_())])

    result = pa.unify_schemas([a, b, c])
    expected = pa.schema([
        pa.field('foo', pa.int32()),
        pa.field('bar', pa.string()),
        pa.field('baz', pa.list_(pa.int8())),
        pa.field('qux', pa.bool_()),
        pa.field('quux', pa.dictionary(pa.int32(), pa.string()))
    ])
    assert result.equals(expected)

    with pytest.raises(pa.ArrowInvalid):
        pa.unify_schemas([b, d])

    # ARROW-14002: Try with tuple instead of list
    result = pa.unify_schemas((a, b, c))
    assert result.equals(expected)
コード例 #2
0
ファイル: dataset.py プロジェクト: astrojams1/cleanstreets
def _union_dataset(children, schema=None, **kwargs):
    if any(v is not None for v in kwargs.values()):
        raise ValueError(
            "When passing a list of Datasets, you cannot pass any additional "
            "arguments")

    if schema is None:
        # unify the children datasets' schemas
        schema = pa.unify_schemas([child.schema for child in children])

    # create datasets with the requested schema
    children = [child.replace_schema(schema) for child in children]

    return UnionDataset(schema, children)