Example #1
0
def pandas_dataframe_resolver(obj, resolver):
    meta = obj.meta
    columns = from_json(meta['columns_'])
    if not columns:
        return pd.DataFrame()

    names = []
    # ensure zero-copy
    blocks = []
    index_size = 0
    for idx, _ in enumerate(columns):
        names.append(from_json(meta['__values_-key-%d' % idx]))
        np_value = resolver.run(obj.member('__values_-value-%d' % idx))
        index_size = len(np_value)
        # ndim: 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
        if BlockPlacement:
            placement = BlockPlacement(slice(idx, idx + 1, 1))
        else:
            placement = slice(idx, idx + 1, 1)
        if DatetimeArray is not None and isinstance(np_value, DatetimeArray):
            values = np_value.reshape(1, -1)
            setattr(values, '__vineyard_ref',
                    getattr(np_value, '__vineyard_ref', None))
            block = DatetimeLikeBlock(values, placement, ndim=2)
        else:
            values = np.expand_dims(np_value, 0).view(ndarray)
            setattr(values, '__vineyard_ref',
                    getattr(np_value, '__vineyard_ref', None))
            block = Block(values, placement, ndim=2)
        blocks.append(block)
    if 'index_' in meta:
        index = resolver.run(obj.member('index_'))
    else:
        index = pd.RangeIndex(index_size)
    return pd.DataFrame(BlockManager(blocks, [ensure_index(names), index]))
Example #2
0
def _concat_managers_axis0(mgrs_indexers, axes: list[Index],
                           copy: bool) -> BlockManager:
    """
    concat_managers specialized to concat_axis=0, with reindexing already
    having been done in _maybe_reindex_columns_na_proxy.
    """
    had_reindexers = {
        i: len(mgrs_indexers[i][1]) > 0
        for i in range(len(mgrs_indexers))
    }
    mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)

    mgrs = [x[0] for x in mgrs_indexers]

    offset = 0
    blocks = []
    for i, mgr in enumerate(mgrs):
        # If we already reindexed, then we definitely don't need another copy
        made_copy = had_reindexers[i]

        for blk in mgr.blocks:
            if made_copy:
                nb = blk.copy(deep=False)
            elif copy:
                nb = blk.copy()
            else:
                # by slicing instead of copy(deep=False), we get a new array
                #  object, see test_concat_copy
                nb = blk.getitem_block(slice(None))
            nb._mgr_locs = nb._mgr_locs.add(offset)
            blocks.append(nb)

        offset += len(mgr.items)
    return BlockManager(tuple(blocks), axes)
Example #3
0
def _iter_block_pairs(left: BlockManager,
                      right: BlockManager) -> Iterator[BlockPairInfo]:
    # At this point we have already checked the parent DataFrames for
    #  assert rframe._indexed_same(lframe)

    for blk in left.blocks:
        locs = blk.mgr_locs
        blk_vals = blk.values

        left_ea = blk_vals.ndim == 1

        rblks = right._slice_take_blocks_ax0(locs.indexer, only_slice=True)

        # Assertions are disabled for performance, but should hold:
        # if left_ea:
        #    assert len(locs) == 1, locs
        #    assert len(rblks) == 1, rblks
        #    assert rblks[0].shape[0] == 1, rblks[0].shape

        for rblk in rblks:
            right_ea = rblk.values.ndim == 1

            lvals, rvals = _get_same_shape_values(blk, rblk, left_ea, right_ea)
            info = BlockPairInfo(lvals, rvals, locs, left_ea, right_ea, rblk)
            yield info
Example #4
0
def pandas_dataframe_resolver(obj, resolver):
    meta = obj.meta
    columns = from_json(meta['columns_'])
    if not columns:
        return pd.DataFrame()
    # ensure zero-copy
    blocks = []
    index_size = 0
    for idx, name in enumerate(columns):
        np_value = resolver.run(obj.member('__values_-value-%d' % idx))
        index_size = len(np_value)
        # ndim: 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
        if BlockPlacement:
            placement = BlockPlacement(slice(idx, idx + 1, 1))
        else:
            placement = slice(idx, idx + 1, 1)
        values = np.expand_dims(np_value, 0).view(ndarray)
        setattr(values, '__vineyard_ref',
                getattr(np_value, '__vineyard_ref', None))
        blocks.append(Block(values, placement, ndim=2))
    if 'index_' in meta:
        index = resolver.run(obj.member('index_'))
    else:
        index = pd.RangeIndex(index_size)
    return pd.DataFrame(BlockManager(blocks, [pd.Index(columns), index]))
Example #5
0
def concatenate_block_managers(mgrs_indexers, axes, concat_axis: int,
                               copy: bool) -> BlockManager:
    """
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    """
    concat_plans = [
        _get_mgr_concatenation_plan(mgr, indexers)
        for mgr, indexers in mgrs_indexers
    ]
    concat_plan = _combine_concat_plans(concat_plans, concat_axis)
    blocks = []

    for placement, join_units in concat_plan:

        if len(join_units) == 1 and not join_units[0].indexers:
            b = join_units[0].block
            values = b.values
            if copy:
                values = values.copy()
            else:
                values = values.view()
            b = b.make_block_same_class(values, placement=placement)
        elif _is_uniform_join_units(join_units):
            blk = join_units[0].block
            vals = [ju.block.values for ju in join_units]

            if not blk.is_extension:
                values = concat_compat(vals, axis=blk.ndim - 1)
            else:
                # TODO(EA2D): special-casing not needed with 2D EAs
                values = concat_compat(vals)
                if not isinstance(values, ExtensionArray):
                    values = values.reshape(1, len(values))

            b = make_block(values, placement=placement, ndim=blk.ndim)
        else:
            b = make_block(
                _concatenate_join_units(join_units, concat_axis, copy=copy),
                placement=placement,
                ndim=len(axes),
            )
        blocks.append(b)

    return BlockManager(blocks, axes)
Example #6
0
def concatenate_block_managers(mgrs_indexers, axes, concat_axis: int,
                               copy: bool) -> BlockManager:
    """
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    """
    concat_plans = [
        _get_mgr_concatenation_plan(mgr, indexers)
        for mgr, indexers in mgrs_indexers
    ]
    concat_plan = _combine_concat_plans(concat_plans, concat_axis)
    blocks = []

    for placement, join_units in concat_plan:

        if len(join_units) == 1 and not join_units[0].indexers:
            b = join_units[0].block
            values = b.values
            if copy:
                values = values.copy()
            else:
                values = values.view()
            b = b.make_block_same_class(values, placement=placement)
        elif _is_uniform_join_units(join_units):
            blk = join_units[0].block
            vals = [ju.block.values for ju in join_units]

            if not blk.is_extension or blk.is_datetimetz or blk.is_categorical:
                # datetimetz and categorical can have the same type but multiple
                #  dtypes, concatting does not necessarily preserve dtype
                values = concat_compat(vals, axis=blk.ndim - 1)
            else:
                # TODO(EA2D): special-casing not needed with 2D EAs
                values = concat_compat(vals)

            b = make_block(values, placement=placement, ndim=blk.ndim)
        else:
            b = make_block(
                _concatenate_join_units(join_units, concat_axis, copy=copy),
                placement=placement,
            )
        blocks.append(b)

    return BlockManager(blocks, axes)
Example #7
0
def dataframe_resolver(obj, resolver):
    meta = obj.meta
    columns = json.loads(meta['columns_'])
    if not columns:
        return pd.DataFrame()
    # ensure zero-copy
    blocks = []
    index_size = 0
    for idx, name in enumerate(columns):
        np_value = resolver.run(obj.member('__values_-value-%d' % idx))
        blocks.append(
            Block(np.expand_dims(np_value, 0), slice(idx, idx + 1, 1)))
        index_size = len(np_value)
    return pd.DataFrame(BlockManager(blocks, [columns, np.arange(index_size)]))
Example #8
0
def pandas_dataframe_resolver(obj, resolver):
    meta = obj.meta
    columns = from_json(meta['columns_'])
    index = resolver.run(obj.member('index_'))
    if not columns:
        return pd.DataFrame()
    # ensure zero-copy
    blocks = []
    for idx, name in enumerate(columns):
        np_value = resolver.run(obj.member('__values_-value-%d' % idx))
        # ndim: 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
        blocks.append(
            Block(np.expand_dims(np_value, 0), slice(idx, idx + 1, 1), ndim=2))
    return pd.DataFrame(BlockManager(blocks, [columns, index]))
Example #9
0
    def execute(cls, ctx, op):
        if vineyard is None:
            raise RuntimeError('vineyard is not available')
        client = vineyard.connect(op.vineyard_socket)

        # chunk has no tensor chunk
        df_chunk = client.get(op.object_id)
        if not df_chunk.columns:
            ctx[op.outputs[0].key] = pd.DataFrame()
        else:
            # ensure zero-copy
            blocks = []
            index_size = 0
            for idx, name in enumerate(df_chunk.columns):
                value = df_chunk[name].numpy()
                blocks.append(
                    Block(np.expand_dims(value, 0), slice(idx, idx + 1, 1)))
                index_size = len(value)
            ctx[op.outputs[0].key] = pd.DataFrame(
                BlockManager(
                    blocks,
                    [df_chunk.columns, np.arange(index_size)]))
Example #10
0
def concatenate_managers(
    mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool
) -> Manager:
    """
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    """
    # TODO(ArrayManager) this assumes that all managers are of the same type
    if isinstance(mgrs_indexers[0][0], ArrayManager):
        return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy)

    concat_plans = [
        _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers
    ]
    concat_plan = _combine_concat_plans(concat_plans, concat_axis)
    blocks = []

    for placement, join_units in concat_plan:
        unit = join_units[0]
        blk = unit.block

        if len(join_units) == 1 and not join_units[0].indexers:
            values = blk.values
            if copy:
                values = values.copy()
            else:
                values = values.view()
            fastpath = True
        elif _is_uniform_join_units(join_units):
            vals = [ju.block.values for ju in join_units]

            if not blk.is_extension:
                # _is_uniform_join_units ensures a single dtype, so
                #  we can use np.concatenate, which is more performant
                #  than concat_compat
                values = np.concatenate(vals, axis=blk.ndim - 1)
            else:
                # TODO(EA2D): special-casing not needed with 2D EAs
                values = concat_compat(vals, axis=1)
                values = ensure_block_shape(values, blk.ndim)

            values = ensure_wrapped_if_datetimelike(values)

            fastpath = blk.values.dtype == values.dtype
        else:
            values = _concatenate_join_units(join_units, concat_axis, copy=copy)
            fastpath = False

        if fastpath:
            b = blk.make_block_same_class(values, placement=placement)
        else:
            b = new_block(values, placement=placement, ndim=len(axes))

        blocks.append(b)

    return BlockManager(tuple(blocks), axes)
Example #11
0
def concatenate_managers(mgrs_indexers, axes: list[Index], concat_axis: int,
                         copy: bool) -> Manager:
    """
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    """
    # TODO(ArrayManager) this assumes that all managers are of the same type
    if isinstance(mgrs_indexers[0][0], ArrayManager):
        return _concatenate_array_managers(mgrs_indexers, axes, concat_axis,
                                           copy)

    # Assertions disabled for performance
    # for tup in mgrs_indexers:
    #    # caller is responsible for ensuring this
    #    indexers = tup[1]
    #    assert concat_axis not in indexers

    if concat_axis == 0:
        return _concat_managers_axis0(mgrs_indexers, axes, copy)

    mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)

    # Assertion disabled for performance
    # assert all(not x[1] for x in mgrs_indexers)

    concat_plans = [
        _get_mgr_concatenation_plan(mgr) for mgr, _ in mgrs_indexers
    ]
    concat_plan = _combine_concat_plans(concat_plans)
    blocks = []

    for placement, join_units in concat_plan:
        unit = join_units[0]
        blk = unit.block
        # Assertion disabled for performance
        # assert len(join_units) == len(mgrs_indexers)

        if len(join_units) == 1:
            values = blk.values
            if copy:
                values = values.copy()
            else:
                values = values.view()
            fastpath = True
        elif _is_uniform_join_units(join_units):
            vals = [ju.block.values for ju in join_units]

            if not blk.is_extension:
                # _is_uniform_join_units ensures a single dtype, so
                #  we can use np.concatenate, which is more performant
                #  than concat_compat
                values = np.concatenate(vals, axis=1)
            else:
                # TODO(EA2D): special-casing not needed with 2D EAs
                values = concat_compat(vals, axis=1)
                values = ensure_block_shape(values, ndim=2)

            values = ensure_wrapped_if_datetimelike(values)

            fastpath = blk.values.dtype == values.dtype
        else:
            values = _concatenate_join_units(join_units, copy=copy)
            fastpath = False

        if fastpath:
            b = blk.make_block_same_class(values, placement=placement)
        else:
            b = new_block_2d(values, placement=placement)

        blocks.append(b)

    return BlockManager(tuple(blocks), axes)
Example #12
0
def concatenate_block_managers(mgrs_indexers, axes: List[Index],
                               concat_axis: int, copy: bool) -> Manager:
    """
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    """
    if isinstance(mgrs_indexers[0][0], ArrayManager):

        if concat_axis == 1:
            # TODO for now only fastpath without indexers
            mgrs = [t[0] for t in mgrs_indexers]
            arrays = [
                concat_compat([mgrs[i].arrays[j] for i in range(len(mgrs))],
                              axis=0) for j in range(len(mgrs[0].arrays))
            ]
            return ArrayManager(arrays, [axes[1], axes[0]])
        elif concat_axis == 0:
            mgrs = [t[0] for t in mgrs_indexers]
            arrays = list(
                itertools.chain.from_iterable([mgr.arrays for mgr in mgrs]))
            return ArrayManager(arrays, [axes[1], axes[0]])

    concat_plans = [
        _get_mgr_concatenation_plan(mgr, indexers)
        for mgr, indexers in mgrs_indexers
    ]
    concat_plan = _combine_concat_plans(concat_plans, concat_axis)
    blocks = []

    for placement, join_units in concat_plan:

        if len(join_units) == 1 and not join_units[0].indexers:
            b = join_units[0].block
            values = b.values
            if copy:
                values = values.copy()
            else:
                values = values.view()
            b = b.make_block_same_class(values, placement=placement)
        elif _is_uniform_join_units(join_units):
            blk = join_units[0].block
            vals = [ju.block.values for ju in join_units]

            if not blk.is_extension:
                # _is_uniform_join_units ensures a single dtype, so
                #  we can use np.concatenate, which is more performant
                #  than concat_compat
                values = np.concatenate(vals, axis=blk.ndim - 1)
            else:
                # TODO(EA2D): special-casing not needed with 2D EAs
                values = concat_compat(vals)
                if not isinstance(values, ExtensionArray):
                    values = values.reshape(1, len(values))

            if blk.values.dtype == values.dtype:
                # Fast-path
                b = blk.make_block_same_class(values, placement=placement)
            else:
                b = make_block(values, placement=placement, ndim=blk.ndim)
        else:
            b = make_block(
                _concatenate_join_units(join_units, concat_axis, copy=copy),
                placement=placement,
                ndim=len(axes),
            )
        blocks.append(b)

    return BlockManager(blocks, axes)