Esempio n. 1
0
def concatenate_block_managers(mgrs_indexers, axes, concat_axis: int,
                               copy: bool) -> BlockManager:
    """
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    """
    concat_plans = [
        _get_mgr_concatenation_plan(mgr, indexers)
        for mgr, indexers in mgrs_indexers
    ]
    concat_plan = _combine_concat_plans(concat_plans, concat_axis)
    blocks = []

    for placement, join_units in concat_plan:

        if len(join_units) == 1 and not join_units[0].indexers:
            b = join_units[0].block
            values = b.values
            if copy:
                values = values.copy()
            else:
                values = values.view()
            b = b.make_block_same_class(values, placement=placement)
        elif _is_uniform_join_units(join_units):
            blk = join_units[0].block
            vals = [ju.block.values for ju in join_units]

            if not blk.is_extension:
                values = concat_compat(vals, axis=blk.ndim - 1)
            else:
                # TODO(EA2D): special-casing not needed with 2D EAs
                values = concat_compat(vals)
                if not isinstance(values, ExtensionArray):
                    values = values.reshape(1, len(values))

            b = make_block(values, placement=placement, ndim=blk.ndim)
        else:
            b = make_block(
                _concatenate_join_units(join_units, concat_axis, copy=copy),
                placement=placement,
                ndim=len(axes),
            )
        blocks.append(b)

    return BlockManager(blocks, axes)
Esempio n. 2
0
def concatenate_block_managers(mgrs_indexers, axes, concat_axis: int,
                               copy: bool) -> BlockManager:
    """
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    """
    concat_plans = [
        _get_mgr_concatenation_plan(mgr, indexers)
        for mgr, indexers in mgrs_indexers
    ]
    concat_plan = _combine_concat_plans(concat_plans, concat_axis)
    blocks = []

    for placement, join_units in concat_plan:

        if len(join_units) == 1 and not join_units[0].indexers:
            b = join_units[0].block
            values = b.values
            if copy:
                values = values.copy()
            else:
                values = values.view()
            b = b.make_block_same_class(values, placement=placement)
        elif _is_uniform_join_units(join_units):
            blk = join_units[0].block
            vals = [ju.block.values for ju in join_units]

            if not blk.is_extension or blk.is_datetimetz or blk.is_categorical:
                # datetimetz and categorical can have the same type but multiple
                #  dtypes, concatting does not necessarily preserve dtype
                values = concat_compat(vals, axis=blk.ndim - 1)
            else:
                # TODO(EA2D): special-casing not needed with 2D EAs
                values = concat_compat(vals)

            b = make_block(values, placement=placement, ndim=blk.ndim)
        else:
            b = make_block(
                _concatenate_join_units(join_units, concat_axis, copy=copy),
                placement=placement,
            )
        blocks.append(b)

    return BlockManager(blocks, axes)
Esempio n. 3
0
    def apply_with_block(self: T, f, align_keys=None, **kwargs) -> T:

        align_keys = align_keys or []
        aligned_args = {k: kwargs[k] for k in align_keys}

        result_arrays = []

        for i, arr in enumerate(self.arrays):

            if aligned_args:
                for k, obj in aligned_args.items():
                    if isinstance(obj, (ABCSeries, ABCDataFrame)):
                        # The caller is responsible for ensuring that
                        #  obj.axes[-1].equals(self.items)
                        if obj.ndim == 1:
                            if self.ndim == 2:
                                kwargs[k] = obj.iloc[slice(i, i + 1)]._values
                            else:
                                kwargs[k] = obj.iloc[:]._values
                        else:
                            kwargs[k] = obj.iloc[:, [i]]._values
                    else:
                        # otherwise we have an ndarray
                        if obj.ndim == 2:
                            kwargs[k] = obj[[i]]

            if hasattr(arr,
                       "tz") and arr.tz is None:  # type: ignore[union-attr]
                # DatetimeArray needs to be converted to ndarray for DatetimeBlock
                arr = arr._data  # type: ignore[union-attr]
            elif arr.dtype.kind == "m" and not isinstance(arr, np.ndarray):
                # TimedeltaArray needs to be converted to ndarray for TimedeltaBlock
                arr = arr._data  # type: ignore[union-attr]

            if self.ndim == 2:
                if isinstance(arr, np.ndarray):
                    arr = np.atleast_2d(arr)
                block = make_block(arr, placement=slice(0, 1, 1), ndim=2)
            else:
                block = make_block(arr,
                                   placement=slice(0, len(self), 1),
                                   ndim=1)

            applied = getattr(block, f)(**kwargs)
            if isinstance(applied, list):
                applied = applied[0]
            arr = applied.values
            if self.ndim == 2:
                if isinstance(arr, np.ndarray):
                    arr = arr[0, :]
            result_arrays.append(arr)

        return type(self)(result_arrays, self._axes)
Esempio n. 4
0
    def iget(self, i: int) -> SingleBlockManager:
        """
        Return the data as a SingleBlockManager.
        """
        from pandas.core.internals.managers import SingleBlockManager

        values = self.arrays[i]
        block = make_block(values, placement=slice(0, len(values)), ndim=1)

        return SingleBlockManager(block, self._axes[0])
Esempio n. 5
0
def init_ndarray(values, index, columns, dtype=None, copy=False):
    # input must be a ndarray, list, Series, index

    if isinstance(values, ABCSeries):
        if columns is None:
            if values.name is not None:
                columns = [values.name]
        if index is None:
            index = values.index
        else:
            values = values.reindex(index)

        # zero len case (GH #2234)
        if not len(values) and columns is not None and len(columns):
            values = np.empty((0, 1), dtype=object)

    # we could have a categorical type passed or coerced to 'category'
    # recast this to an arrays_to_mgr
    if is_categorical_dtype(getattr(values, "dtype",
                                    None)) or is_categorical_dtype(dtype):

        if not hasattr(values, "dtype"):
            values = prep_ndarray(values, copy=copy)
            values = values.ravel()
        elif copy:
            values = values.copy()

        index, columns = _get_axes(len(values), 1, index, columns)
        return arrays_to_mgr([values], columns, index, columns, dtype=dtype)
    elif is_extension_array_dtype(values):
        # GH#19157
        if columns is None:
            columns = [0]
        return arrays_to_mgr([values], columns, index, columns, dtype=dtype)

    # by definition an array here
    # the dtypes will be coerced to a single dtype
    values = prep_ndarray(values, copy=copy)

    if dtype is not None:
        if not is_dtype_equal(values.dtype, dtype):
            try:
                values = values.astype(dtype)
            except Exception as orig:
                # e.g. ValueError when trying to cast object dtype to float64
                raise ValueError("failed to cast to '{dtype}' (Exception "
                                 "was: {orig})".format(dtype=dtype,
                                                       orig=orig)) from orig

    index, columns = _get_axes(*values.shape, index=index, columns=columns)
    values = values.T

    # if we don't have a dtype specified, then try to convert objects
    # on the entire block; this is to convert if we have datetimelike's
    # embedded in an object type
    if dtype is None and is_object_dtype(values):

        if values.ndim == 2 and values.shape[0] != 1:
            # transpose and separate blocks

            dvals_list = [maybe_infer_to_datetimelike(row) for row in values]
            for n in range(len(dvals_list)):
                if isinstance(dvals_list[n], np.ndarray):
                    dvals_list[n] = dvals_list[n].reshape(1, -1)

            from pandas.core.internals.blocks import make_block

            # TODO: What about re-joining object columns?
            block_values = [
                make_block(dvals_list[n], placement=[n])
                for n in range(len(dvals_list))
            ]

        else:
            datelike_vals = maybe_infer_to_datetimelike(values)
            block_values = [datelike_vals]
    else:
        block_values = [values]

    return create_block_manager_from_blocks(block_values, [columns, index])
Esempio n. 6
0
def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
    # input must be a ndarray, list, Series, index

    if isinstance(values, ABCSeries):
        if columns is None:
            if values.name is not None:
                columns = [values.name]
        if index is None:
            index = values.index
        else:
            values = values.reindex(index)

        # zero len case (GH #2234)
        if not len(values) and columns is not None and len(columns):
            values = np.empty((0, 1), dtype=object)

    if is_extension_array_dtype(values) or is_extension_array_dtype(dtype):
        # GH#19157

        if isinstance(values, np.ndarray) and values.ndim > 1:
            # GH#12513 a EA dtype passed with a 2D array, split into
            #  multiple EAs that view the values
            values = [values[:, n] for n in range(values.shape[1])]
        else:
            values = [values]

        if columns is None:
            columns = Index(range(len(values)))

        return arrays_to_mgr(values, columns, index, columns, dtype=dtype)

    # by definition an array here
    # the dtypes will be coerced to a single dtype
    values = _prep_ndarray(values, copy=copy)

    if dtype is not None and not is_dtype_equal(values.dtype, dtype):
        try:
            values = construct_1d_ndarray_preserving_na(
                values.ravel(), dtype=dtype, copy=False
            ).reshape(values.shape)
        except Exception as orig:
            # e.g. ValueError when trying to cast object dtype to float64
            raise ValueError(
                f"failed to cast to '{dtype}' (Exception was: {orig})"
            ) from orig

    # _prep_ndarray ensures that values.ndim == 2 at this point
    index, columns = _get_axes(
        values.shape[0], values.shape[1], index=index, columns=columns
    )
    values = values.T

    # if we don't have a dtype specified, then try to convert objects
    # on the entire block; this is to convert if we have datetimelike's
    # embedded in an object type
    if dtype is None and is_object_dtype(values.dtype):

        if values.ndim == 2 and values.shape[0] != 1:
            # transpose and separate blocks

            dvals_list = [maybe_infer_to_datetimelike(row) for row in values]
            for n in range(len(dvals_list)):
                if isinstance(dvals_list[n], np.ndarray):
                    dvals_list[n] = dvals_list[n].reshape(1, -1)

            from pandas.core.internals.blocks import make_block

            # TODO: What about re-joining object columns?
            block_values = [
                make_block(dvals_list[n], placement=[n], ndim=2)
                for n in range(len(dvals_list))
            ]

        else:
            datelike_vals = maybe_infer_to_datetimelike(values)
            block_values = [datelike_vals]
    else:
        block_values = [values]

    return create_block_manager_from_blocks(block_values, [columns, index])
Esempio n. 7
0
def concatenate_managers(mgrs_indexers, axes: List[Index], concat_axis: int,
                         copy: bool) -> Manager:
    """
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    """
    # TODO(ArrayManager) this assumes that all managers are of the same type
    if isinstance(mgrs_indexers[0][0], ArrayManager):
        return concatenate_array_managers(mgrs_indexers, axes, concat_axis,
                                          copy)

    concat_plans = [
        _get_mgr_concatenation_plan(mgr, indexers)
        for mgr, indexers in mgrs_indexers
    ]
    concat_plan = _combine_concat_plans(concat_plans, concat_axis)
    blocks = []

    for placement, join_units in concat_plan:

        if len(join_units) == 1 and not join_units[0].indexers:
            b = join_units[0].block
            values = b.values
            if copy:
                values = values.copy()
            else:
                values = values.view()
            b = b.make_block_same_class(values, placement=placement)
        elif _is_uniform_join_units(join_units):
            blk = join_units[0].block
            vals = [ju.block.values for ju in join_units]

            if not blk.is_extension:
                # _is_uniform_join_units ensures a single dtype, so
                #  we can use np.concatenate, which is more performant
                #  than concat_compat
                values = np.concatenate(vals, axis=blk.ndim - 1)
            else:
                # TODO(EA2D): special-casing not needed with 2D EAs
                values = concat_compat(vals)
                if not isinstance(values, ExtensionArray):
                    values = values.reshape(1, len(values))

            if blk.values.dtype == values.dtype:
                # Fast-path
                b = blk.make_block_same_class(values, placement=placement)
            else:
                b = make_block(values, placement=placement, ndim=blk.ndim)
        else:
            new_values = _concatenate_join_units(join_units,
                                                 concat_axis,
                                                 copy=copy)
            b = make_block(new_values, placement=placement, ndim=len(axes))
        blocks.append(b)

    return BlockManager(blocks, axes)
Esempio n. 8
0
def ndarray_to_mgr(values, index, columns, dtype: Optional[DtypeObj],
                   copy: bool, typ: str) -> Manager:
    # used in DataFrame.__init__
    # input must be a ndarray, list, Series, Index, ExtensionArray

    if isinstance(values, ABCSeries):
        if columns is None:
            if values.name is not None:
                columns = Index([values.name])
        if index is None:
            index = values.index
        else:
            values = values.reindex(index)

        # zero len case (GH #2234)
        if not len(values) and columns is not None and len(columns):
            values = np.empty((0, 1), dtype=object)

    if is_extension_array_dtype(values) or is_extension_array_dtype(dtype):
        # GH#19157

        if isinstance(values, np.ndarray) and values.ndim > 1:
            # GH#12513 a EA dtype passed with a 2D array, split into
            #  multiple EAs that view the values
            values = [values[:, n] for n in range(values.shape[1])]
        else:
            values = [values]

        if columns is None:
            columns = Index(range(len(values)))

        return arrays_to_mgr(values,
                             columns,
                             index,
                             columns,
                             dtype=dtype,
                             typ=typ)

    # by definition an array here
    # the dtypes will be coerced to a single dtype
    values = _prep_ndarray(values, copy=copy)

    if dtype is not None and not is_dtype_equal(values.dtype, dtype):
        shape = values.shape
        flat = values.ravel()

        if not is_integer_dtype(dtype):
            # TODO: skipping integer_dtype is needed to keep the tests passing,
            #  not clear it is correct
            # Note: we really only need _try_cast, but keeping to exposed funcs
            values = sanitize_array(flat,
                                    None,
                                    dtype=dtype,
                                    copy=copy,
                                    raise_cast_failure=True)
        else:
            try:
                values = construct_1d_ndarray_preserving_na(flat,
                                                            dtype=dtype,
                                                            copy=False)
            except Exception as err:
                # e.g. ValueError when trying to cast object dtype to float64
                msg = f"failed to cast to '{dtype}' (Exception was: {err})"
                raise ValueError(msg) from err
        values = values.reshape(shape)

    # _prep_ndarray ensures that values.ndim == 2 at this point
    index, columns = _get_axes(values.shape[0],
                               values.shape[1],
                               index=index,
                               columns=columns)
    values = values.T

    # if we don't have a dtype specified, then try to convert objects
    # on the entire block; this is to convert if we have datetimelike's
    # embedded in an object type
    if dtype is None and is_object_dtype(values.dtype):

        if values.ndim == 2 and values.shape[0] != 1:
            # transpose and separate blocks

            dvals_list = [maybe_infer_to_datetimelike(row) for row in values]
            dvals_list = [ensure_block_shape(dval, 2) for dval in dvals_list]

            # TODO: What about re-joining object columns?
            dvals_list = [maybe_squeeze_dt64tz(x) for x in dvals_list]
            block_values = [
                make_block(dvals_list[n], placement=[n], ndim=2)
                for n in range(len(dvals_list))
            ]

        else:
            datelike_vals = maybe_infer_to_datetimelike(values)
            datelike_vals = maybe_squeeze_dt64tz(datelike_vals)
            block_values = [datelike_vals]
    else:
        block_values = [maybe_squeeze_dt64tz(values)]

    return create_block_manager_from_blocks(block_values, [columns, index])
Esempio n. 9
0
def concatenate_block_managers(mgrs_indexers, axes: List[Index],
                               concat_axis: int, copy: bool) -> Manager:
    """
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    """
    if isinstance(mgrs_indexers[0][0], ArrayManager):

        if concat_axis == 1:
            # TODO for now only fastpath without indexers
            mgrs = [t[0] for t in mgrs_indexers]
            arrays = [
                concat_compat([mgrs[i].arrays[j] for i in range(len(mgrs))],
                              axis=0) for j in range(len(mgrs[0].arrays))
            ]
            return ArrayManager(arrays, [axes[1], axes[0]])
        elif concat_axis == 0:
            mgrs = [t[0] for t in mgrs_indexers]
            arrays = list(
                itertools.chain.from_iterable([mgr.arrays for mgr in mgrs]))
            return ArrayManager(arrays, [axes[1], axes[0]])

    concat_plans = [
        _get_mgr_concatenation_plan(mgr, indexers)
        for mgr, indexers in mgrs_indexers
    ]
    concat_plan = _combine_concat_plans(concat_plans, concat_axis)
    blocks = []

    for placement, join_units in concat_plan:

        if len(join_units) == 1 and not join_units[0].indexers:
            b = join_units[0].block
            values = b.values
            if copy:
                values = values.copy()
            else:
                values = values.view()
            b = b.make_block_same_class(values, placement=placement)
        elif _is_uniform_join_units(join_units):
            blk = join_units[0].block
            vals = [ju.block.values for ju in join_units]

            if not blk.is_extension:
                # _is_uniform_join_units ensures a single dtype, so
                #  we can use np.concatenate, which is more performant
                #  than concat_compat
                values = np.concatenate(vals, axis=blk.ndim - 1)
            else:
                # TODO(EA2D): special-casing not needed with 2D EAs
                values = concat_compat(vals)
                if not isinstance(values, ExtensionArray):
                    values = values.reshape(1, len(values))

            if blk.values.dtype == values.dtype:
                # Fast-path
                b = blk.make_block_same_class(values, placement=placement)
            else:
                b = make_block(values, placement=placement, ndim=blk.ndim)
        else:
            b = make_block(
                _concatenate_join_units(join_units, concat_axis, copy=copy),
                placement=placement,
                ndim=len(axes),
            )
        blocks.append(b)

    return BlockManager(blocks, axes)