def quantile( self, *, qs: Float64Index, axis: int = 0, transposed: bool = False, interpolation="linear", ) -> ArrayManager: # error: Value of type variable "ArrayLike" of "ensure_block_shape" cannot be # "Union[ndarray, ExtensionArray]" arrs = [ensure_block_shape(x, 2) for x in self.arrays] # type: ignore[type-var] assert axis == 1 # error: Value of type variable "ArrayLike" of "quantile_compat" cannot be # "object" new_arrs = [ quantile_compat(x, qs, interpolation, axis=axis) # type: ignore[type-var] for x in arrs ] for i, arr in enumerate(new_arrs): if arr.ndim == 2: assert arr.shape[0] == 1, arr.shape new_arrs[i] = arr[0] axes = [qs, self._axes[1]] return type(self)(new_arrs, axes)
def _concatenate_join_units( join_units: list[JoinUnit], concat_axis: int, copy: bool ) -> ArrayLike: """ Concatenate values from several join units along selected axis. """ if concat_axis == 0 and len(join_units) > 1: # Concatenating join units along ax0 is handled in _merge_blocks. raise AssertionError("Concatenating join units along axis0") empty_dtype = _get_empty_dtype(join_units) has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units) upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks) to_concat = [ ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na) for ju in join_units ] if len(to_concat) == 1: # Only one block, nothing to concatenate. concat_values = to_concat[0] if copy: if isinstance(concat_values, np.ndarray): # non-reindexed (=not yet copied) arrays are made into a view # in JoinUnit.get_reindexed_values if concat_values.base is not None: concat_values = concat_values.copy() else: concat_values = concat_values.copy() elif any(is_1d_only_ea_dtype(t.dtype) for t in to_concat): # TODO(EA2D): special case not needed if all EAs used HybridBlocks # NB: we are still assuming here that Hybrid blocks have shape (1, N) # concatting with at least one EA means we are concatting a single column # the non-EA values are 2D arrays with shape (1, n) # error: No overload variant of "__getitem__" of "ExtensionArray" matches # argument type "Tuple[int, slice]" to_concat = [ t if is_1d_only_ea_dtype(t.dtype) else t[0, :] # type: ignore[call-overload] for t in to_concat ] concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) concat_values = ensure_block_shape(concat_values, 2) else: concat_values = concat_compat(to_concat, axis=concat_axis) return concat_values
def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: """ Apply grouped reduction function columnwise, returning a new ArrayManager. Parameters ---------- func : grouped reduction function ignore_failures : bool, default False Whether to drop columns where func raises TypeError. Returns ------- ArrayManager """ result_arrays: list[np.ndarray] = [] result_indices: list[int] = [] for i, arr in enumerate(self.arrays): # grouped_reduce functions all expect 2D arrays arr = ensure_block_shape(arr, ndim=2) try: res = func(arr) except (TypeError, NotImplementedError): if not ignore_failures: raise continue if res.ndim == 2: # reverse of ensure_block_shape assert res.shape[0] == 1 res = res[0] result_arrays.append(res) result_indices.append(i) if len(result_arrays) == 0: index = Index([None]) # placeholder else: index = Index(range(result_arrays[0].shape[0])) if ignore_failures: columns = self.items[np.array(result_indices, dtype="int64")] else: columns = self.items # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; # expected "List[Union[ndarray, ExtensionArray]]" return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type]
def _concatenate_join_units( join_units: List[JoinUnit], concat_axis: int, copy: bool ) -> ArrayLike: """ Concatenate values from several join units along selected axis. """ if concat_axis == 0 and len(join_units) > 1: # Concatenating join units along ax0 is handled in _merge_blocks. raise AssertionError("Concatenating join units along axis0") empty_dtype = _get_empty_dtype(join_units) has_none_blocks = any(unit.block is None for unit in join_units) upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks) to_concat = [ ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na) for ju in join_units ] if len(to_concat) == 1: # Only one block, nothing to concatenate. concat_values = to_concat[0] if copy: if isinstance(concat_values, np.ndarray): # non-reindexed (=not yet copied) arrays are made into a view # in JoinUnit.get_reindexed_values if concat_values.base is not None: concat_values = concat_values.copy() else: concat_values = concat_values.copy() elif any(isinstance(t, ExtensionArray) and t.ndim == 1 for t in to_concat): # concatting with at least one EA means we are concatting a single column # the non-EA values are 2D arrays with shape (1, n) # error: Invalid index type "Tuple[int, slice]" for # "Union[ExtensionArray, ndarray]"; expected type "Union[int, slice, ndarray]" to_concat = [ t if (isinstance(t, ExtensionArray) and t.ndim == 1) else t[0, :] # type: ignore[index] for t in to_concat ] concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) concat_values = ensure_block_shape(concat_values, 2) else: concat_values = concat_compat(to_concat, axis=concat_axis) return concat_values
def make_block(values, placement, klass=None, ndim=None, dtype: Dtype | None = None) -> Block: """ This is a pseudo-public analogue to blocks.new_block. We ask that downstream libraries use this rather than any fully-internal APIs, including but not limited to: - core.internals.blocks.make_block - Block.make_block - Block.make_block_same_class - Block.__init__ """ if dtype is not None: dtype = pandas_dtype(dtype) values, dtype = extract_pandas_array(values, dtype, ndim) if klass is ExtensionBlock and is_period_dtype(values.dtype): # GH-44681 changed PeriodArray to be stored in the 2D # NDArrayBackedExtensionBlock instead of ExtensionBlock # -> still allow ExtensionBlock to be passed in this case for back compat klass = None if klass is None: dtype = dtype or values.dtype klass = get_block_type(dtype) elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype): # pyarrow calls get here values = DatetimeArray._simple_new(values, dtype=dtype) if not isinstance(placement, BlockPlacement): placement = BlockPlacement(placement) ndim = maybe_infer_ndim(values, placement, ndim) if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype): # GH#41168 ensure we can pass 1D dt64tz values # More generally, any EA dtype that isn't is_1d_only_ea_dtype values = extract_array(values, extract_numpy=True) values = ensure_block_shape(values, ndim) check_ndim(values, placement, ndim) values = maybe_coerce_values(values) return klass(values, ndim=ndim, placement=placement)
def quantile( self, *, qs: Float64Index, axis: int = 0, transposed: bool = False, interpolation="linear", ) -> ArrayManager: arrs = [ensure_block_shape(x, 2) for x in self.arrays] assert axis == 1 new_arrs = [quantile_compat(x, qs, interpolation, axis=axis) for x in arrs] for i, arr in enumerate(new_arrs): if arr.ndim == 2: assert arr.shape[0] == 1, arr.shape new_arrs[i] = arr[0] axes = [qs, self._axes[1]] return type(self)(new_arrs, axes)
def make_block(values, placement, klass=None, ndim=None, dtype: Dtype | None = None) -> Block: """ This is a pseudo-public analogue to blocks.new_block. We ask that downstream libraries use this rather than any fully-internal APIs, including but not limited to: - core.internals.blocks.make_block - Block.make_block - Block.make_block_same_class - Block.__init__ """ if dtype is not None: dtype = pandas_dtype(dtype) values, dtype = extract_pandas_array(values, dtype, ndim) needs_reshape = False if klass is None: dtype = dtype or values.dtype klass = get_block_type(values, dtype) elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype): # pyarrow calls get here values = DatetimeArray._simple_new(values, dtype=dtype) needs_reshape = True if not isinstance(placement, BlockPlacement): placement = BlockPlacement(placement) ndim = maybe_infer_ndim(values, placement, ndim) if needs_reshape: values = ensure_block_shape(values, ndim) check_ndim(values, placement, ndim) values = maybe_coerce_values(values) return klass(values, ndim=ndim, placement=placement)
def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T: # switch axis to follow BlockManager logic if swap_axis and "axis" in kwargs and self.ndim == 2: kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0 align_keys = align_keys or [] aligned_args = {k: kwargs[k] for k in align_keys} result_arrays = [] for i, arr in enumerate(self.arrays): if aligned_args: for k, obj in aligned_args.items(): if isinstance(obj, (ABCSeries, ABCDataFrame)): # The caller is responsible for ensuring that # obj.axes[-1].equals(self.items) if obj.ndim == 1: if self.ndim == 2: kwargs[k] = obj.iloc[slice(i, i + 1)]._values else: kwargs[k] = obj.iloc[:]._values else: kwargs[k] = obj.iloc[:, [i]]._values else: # otherwise we have an ndarray if obj.ndim == 2: kwargs[k] = obj[[i]] # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no # attribute "tz" if hasattr(arr, "tz") and arr.tz is None: # type: ignore[union-attr] # DatetimeArray needs to be converted to ndarray for DatetimeLikeBlock # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no # attribute "_data" arr = arr._data # type: ignore[union-attr] elif arr.dtype.kind == "m" and not isinstance(arr, np.ndarray): # TimedeltaArray needs to be converted to ndarray for TimedeltaBlock # error: "ExtensionArray" has no attribute "_data" arr = arr._data # type: ignore[attr-defined] if self.ndim == 2: arr = ensure_block_shape(arr, 2) block = new_block(arr, placement=slice(0, 1, 1), ndim=2) else: block = new_block(arr, placement=slice(0, len(self), 1), ndim=1) applied = getattr(block, f)(**kwargs) if isinstance(applied, list): applied = applied[0] arr = applied.values if self.ndim == 2 and arr.ndim == 2: # 2D for np.ndarray or DatetimeArray/TimedeltaArray assert len(arr) == 1 # error: Invalid index type "Tuple[int, slice]" for # "Union[ndarray, ExtensionArray]"; expected type # "Union[int, slice, ndarray]" arr = arr[0, :] # type: ignore[index] result_arrays.append(arr) return type(self)(result_arrays, self._axes)
def ndarray_to_mgr(values, index, columns, dtype: DtypeObj | None, copy: bool, typ: str) -> Manager: # used in DataFrame.__init__ # input must be a ndarray, list, Series, Index, ExtensionArray if isinstance(values, ABCSeries): if columns is None: if values.name is not None: columns = Index([values.name]) if index is None: index = values.index else: values = values.reindex(index) # zero len case (GH #2234) if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) # if the array preparation does a copy -> avoid this for ArrayManager, # since the copy is done on conversion to 1D arrays copy_on_sanitize = False if typ == "array" else copy vdtype = getattr(values, "dtype", None) if is_1d_only_ea_dtype(vdtype) or is_1d_only_ea_dtype(dtype): # GH#19157 if isinstance(values, (np.ndarray, ExtensionArray)) and values.ndim > 1: # GH#12513 a EA dtype passed with a 2D array, split into # multiple EAs that view the values # error: No overload variant of "__getitem__" of "ExtensionArray" # matches argument type "Tuple[slice, int]" values = [ values[:, n] # type: ignore[call-overload] for n in range(values.shape[1]) ] else: values = [values] if columns is None: columns = Index(range(len(values))) else: columns = ensure_index(columns) return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ) elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype): # i.e. Datetime64TZ, PeriodDtype values = extract_array(values, extract_numpy=True) if copy: values = values.copy() if values.ndim == 1: values = values.reshape(-1, 1) else: # by definition an array here # the dtypes will be coerced to a single dtype values = _prep_ndarray(values, copy=copy_on_sanitize) if dtype is not None and not is_dtype_equal(values.dtype, dtype): shape = values.shape flat = values.ravel() # GH#40110 see similar check inside sanitize_array rcf = not (is_integer_dtype(dtype) and values.dtype.kind == "f") values = sanitize_array(flat, None, dtype=dtype, copy=copy_on_sanitize, raise_cast_failure=rcf) values = values.reshape(shape) # _prep_ndarray ensures that values.ndim == 2 at this point index, columns = _get_axes(values.shape[0], values.shape[1], index=index, columns=columns) _check_values_indices_shape_match(values, index, columns) if typ == "array": if issubclass(values.dtype.type, str): values = np.array(values, dtype=object) if dtype is None and is_object_dtype(values.dtype): arrays = [ ensure_wrapped_if_datetimelike( maybe_infer_to_datetimelike(values[:, i])) for i in range(values.shape[1]) ] else: if is_datetime_or_timedelta_dtype(values.dtype): values = ensure_wrapped_if_datetimelike(values) arrays = [values[:, i] for i in range(values.shape[1])] if copy: arrays = [arr.copy() for arr in arrays] return ArrayManager(arrays, [index, columns], verify_integrity=False) values = values.T # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values.dtype): obj_columns = list(values) maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns] # don't convert (and copy) the objects if no type inference occurs if any(x is not y for x, y in zip(obj_columns, maybe_datetime)): dvals_list = [ ensure_block_shape(dval, 2) for dval in maybe_datetime ] block_values = [ new_block_2d(dvals_list[n], placement=BlockPlacement(n)) for n in range(len(dvals_list)) ] else: bp = BlockPlacement(slice(len(columns))) nb = new_block_2d(values, placement=bp) block_values = [nb] else: bp = BlockPlacement(slice(len(columns))) nb = new_block_2d(values, placement=bp) block_values = [nb] if len(columns) == 0: block_values = [] return create_block_manager_from_blocks(block_values, [columns, index], verify_integrity=False)
def ndarray_to_mgr(values, index, columns, dtype: Optional[DtypeObj], copy: bool, typ: str) -> Manager: # used in DataFrame.__init__ # input must be a ndarray, list, Series, Index, ExtensionArray if isinstance(values, ABCSeries): if columns is None: if values.name is not None: columns = Index([values.name]) if index is None: index = values.index else: values = values.reindex(index) # zero len case (GH #2234) if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) if is_extension_array_dtype(values) or is_extension_array_dtype(dtype): # GH#19157 if isinstance(values, np.ndarray) and values.ndim > 1: # GH#12513 a EA dtype passed with a 2D array, split into # multiple EAs that view the values values = [values[:, n] for n in range(values.shape[1])] else: values = [values] if columns is None: columns = Index(range(len(values))) return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ) # by definition an array here # the dtypes will be coerced to a single dtype values = _prep_ndarray(values, copy=copy) if dtype is not None and not is_dtype_equal(values.dtype, dtype): shape = values.shape flat = values.ravel() if not is_integer_dtype(dtype): # TODO: skipping integer_dtype is needed to keep the tests passing, # not clear it is correct # Note: we really only need _try_cast, but keeping to exposed funcs values = sanitize_array(flat, None, dtype=dtype, copy=copy, raise_cast_failure=True) else: try: values = construct_1d_ndarray_preserving_na(flat, dtype=dtype, copy=False) except Exception as err: # e.g. ValueError when trying to cast object dtype to float64 msg = f"failed to cast to '{dtype}' (Exception was: {err})" raise ValueError(msg) from err values = values.reshape(shape) # _prep_ndarray ensures that values.ndim == 2 at this point index, columns = _get_axes(values.shape[0], values.shape[1], index=index, columns=columns) values = values.T # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values.dtype): if values.ndim == 2 and values.shape[0] != 1: # transpose and separate blocks dvals_list = [maybe_infer_to_datetimelike(row) for row in values] dvals_list = [ensure_block_shape(dval, 2) for dval in dvals_list] # TODO: What about re-joining object columns? dvals_list = [maybe_squeeze_dt64tz(x) for x in dvals_list] block_values = [ new_block(dvals_list[n], placement=n, ndim=2) for n in range(len(dvals_list)) ] else: datelike_vals = maybe_infer_to_datetimelike(values) datelike_vals = maybe_squeeze_dt64tz(datelike_vals) block_values = [datelike_vals] else: block_values = [maybe_squeeze_dt64tz(values)] return create_block_manager_from_blocks(block_values, [columns, index])
def create_block(typestr, placement, item_shape=None, num_offset=0, maker=new_block): """ Supported typestr: * float, f8, f4, f2 * int, i8, i4, i2, i1 * uint, u8, u4, u2, u1 * complex, c16, c8 * bool * object, string, O * datetime, dt, M8[ns], M8[ns, tz] * timedelta, td, m8[ns] * sparse (SparseArray with fill_value=0.0) * sparse_na (SparseArray with fill_value=np.nan) * category, category2 """ placement = BlockPlacement(placement) num_items = len(placement) if item_shape is None: item_shape = (N, ) shape = (num_items, ) + item_shape mat = get_numeric_mat(shape) if typestr in ( "float", "f8", "f4", "f2", "int", "i8", "i4", "i2", "i1", "uint", "u8", "u4", "u2", "u1", ): values = mat.astype(typestr) + num_offset elif typestr in ("complex", "c16", "c8"): values = 1.0j * (mat.astype(typestr) + num_offset) elif typestr in ("object", "string", "O"): values = np.reshape([f"A{i:d}" for i in mat.ravel() + num_offset], shape) elif typestr in ("b", "bool"): values = np.ones(shape, dtype=np.bool_) elif typestr in ("datetime", "dt", "M8[ns]"): values = (mat * 1e9).astype("M8[ns]") elif typestr.startswith("M8[ns"): # datetime with tz m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr) assert m is not None, f"incompatible typestr -> {typestr}" tz = m.groups()[0] assert num_items == 1, "must have only 1 num items for a tz-aware" values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)._data values = ensure_block_shape(values, ndim=len(shape)) elif typestr in ("timedelta", "td", "m8[ns]"): values = (mat * 1).astype("m8[ns]") elif typestr in ("category", ): values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4]) elif typestr in ("category2", ): values = Categorical( ["a", "a", "a", "a", "b", "b", "c", "c", "c", "d"]) elif typestr in ("sparse", "sparse_na"): # FIXME: doesn't support num_rows != 10 assert shape[-1] == 10 assert all(s == 1 for s in shape[:-1]) if typestr.endswith("_na"): fill_value = np.nan else: fill_value = 0.0 values = SparseArray( [fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6], fill_value=fill_value, ) arr = values.sp_values.view() arr += num_offset - 1 else: raise ValueError(f'Unsupported typestr: "{typestr}"') return maker(values, placement=placement, ndim=len(shape))
def concatenate_managers( mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool ) -> Manager: """ Concatenate block managers into one. Parameters ---------- mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples axes : list of Index concat_axis : int copy : bool Returns ------- BlockManager """ # TODO(ArrayManager) this assumes that all managers are of the same type if isinstance(mgrs_indexers[0][0], ArrayManager): return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) concat_plans = [ _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers ] concat_plan = _combine_concat_plans(concat_plans, concat_axis) blocks = [] for placement, join_units in concat_plan: unit = join_units[0] blk = unit.block if len(join_units) == 1 and not join_units[0].indexers: values = blk.values if copy: values = values.copy() else: values = values.view() fastpath = True elif _is_uniform_join_units(join_units): vals = [ju.block.values for ju in join_units] if not blk.is_extension: # _is_uniform_join_units ensures a single dtype, so # we can use np.concatenate, which is more performant # than concat_compat values = np.concatenate(vals, axis=blk.ndim - 1) else: # TODO(EA2D): special-casing not needed with 2D EAs values = concat_compat(vals, axis=1) values = ensure_block_shape(values, blk.ndim) values = ensure_wrapped_if_datetimelike(values) fastpath = blk.values.dtype == values.dtype else: values = _concatenate_join_units(join_units, concat_axis, copy=copy) fastpath = False if fastpath: b = blk.make_block_same_class(values, placement=placement) else: b = new_block(values, placement=placement, ndim=len(axes)) blocks.append(b) return BlockManager(tuple(blocks), axes)
def ndarray_to_mgr(values, index, columns, dtype: DtypeObj | None, copy: bool, typ: str) -> Manager: # used in DataFrame.__init__ # input must be a ndarray, list, Series, Index, ExtensionArray if isinstance(values, ABCSeries): if columns is None: if values.name is not None: columns = Index([values.name]) if index is None: index = values.index else: values = values.reindex(index) # zero len case (GH #2234) if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) vdtype = getattr(values, "dtype", None) if is_1d_only_ea_dtype(vdtype) or isinstance(dtype, ExtensionDtype): # GH#19157 if isinstance(values, np.ndarray) and values.ndim > 1: # GH#12513 a EA dtype passed with a 2D array, split into # multiple EAs that view the values values = [values[:, n] for n in range(values.shape[1])] else: values = [values] if columns is None: columns = Index(range(len(values))) return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ) if is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype): # i.e. Datetime64TZ values = extract_array(values, extract_numpy=True) if copy: values = values.copy() if values.ndim == 1: values = values.reshape(-1, 1) else: # by definition an array here # the dtypes will be coerced to a single dtype values = _prep_ndarray(values, copy=copy) if dtype is not None and not is_dtype_equal(values.dtype, dtype): shape = values.shape flat = values.ravel() if not is_integer_dtype(dtype): # TODO: skipping integer_dtype is needed to keep the tests passing, # not clear it is correct # Note: we really only need _try_cast, but keeping to exposed funcs values = sanitize_array(flat, None, dtype=dtype, copy=copy, raise_cast_failure=True) else: try: values = construct_1d_ndarray_preserving_na(flat, dtype=dtype, copy=False) except IntCastingNaNError: # following Series, we ignore the dtype and retain floating # values instead of casting nans to meaningless ints pass values = values.reshape(shape) # _prep_ndarray ensures that values.ndim == 2 at this point index, columns = _get_axes(values.shape[0], values.shape[1], index=index, columns=columns) _check_values_indices_shape_match(values, index, columns) if typ == "array": if issubclass(values.dtype.type, str): values = np.array(values, dtype=object) if dtype is None and is_object_dtype(values.dtype): arrays = [ ensure_wrapped_if_datetimelike( maybe_infer_to_datetimelike(values[:, i].copy())) for i in range(values.shape[1]) ] else: if is_datetime_or_timedelta_dtype(values.dtype): values = ensure_wrapped_if_datetimelike(values) arrays = [values[:, i].copy() for i in range(values.shape[1])] return ArrayManager(arrays, [index, columns], verify_integrity=False) values = values.T # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values.dtype): if values.ndim == 2 and values.shape[0] != 1: # transpose and separate blocks dtlike_vals = [maybe_infer_to_datetimelike(row) for row in values] dvals_list = [ensure_block_shape(dval, 2) for dval in dtlike_vals] # TODO: What about re-joining object columns? block_values = [ new_block(dvals_list[n], placement=n, ndim=2) for n in range(len(dvals_list)) ] else: datelike_vals = maybe_infer_to_datetimelike(values) nb = new_block(datelike_vals, placement=slice(len(columns)), ndim=2) block_values = [nb] else: nb = new_block(values, placement=slice(len(columns)), ndim=2) block_values = [nb] if len(columns) == 0: block_values = [] return create_block_manager_from_blocks(block_values, [columns, index])
def concatenate_managers(mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool) -> Manager: """ Concatenate block managers into one. Parameters ---------- mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples axes : list of Index concat_axis : int copy : bool Returns ------- BlockManager """ # TODO(ArrayManager) this assumes that all managers are of the same type if isinstance(mgrs_indexers[0][0], ArrayManager): return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) # Assertions disabled for performance # for tup in mgrs_indexers: # # caller is responsible for ensuring this # indexers = tup[1] # assert concat_axis not in indexers if concat_axis == 0: return _concat_managers_axis0(mgrs_indexers, axes, copy) mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) # Assertion disabled for performance # assert all(not x[1] for x in mgrs_indexers) concat_plans = [ _get_mgr_concatenation_plan(mgr) for mgr, _ in mgrs_indexers ] concat_plan = _combine_concat_plans(concat_plans) blocks = [] for placement, join_units in concat_plan: unit = join_units[0] blk = unit.block # Assertion disabled for performance # assert len(join_units) == len(mgrs_indexers) if len(join_units) == 1: values = blk.values if copy: values = values.copy() else: values = values.view() fastpath = True elif _is_uniform_join_units(join_units): vals = [ju.block.values for ju in join_units] if not blk.is_extension: # _is_uniform_join_units ensures a single dtype, so # we can use np.concatenate, which is more performant # than concat_compat values = np.concatenate(vals, axis=1) else: # TODO(EA2D): special-casing not needed with 2D EAs values = concat_compat(vals, axis=1) values = ensure_block_shape(values, ndim=2) values = ensure_wrapped_if_datetimelike(values) fastpath = blk.values.dtype == values.dtype else: values = _concatenate_join_units(join_units, copy=copy) fastpath = False if fastpath: b = blk.make_block_same_class(values, placement=placement) else: b = new_block_2d(values, placement=placement) blocks.append(b) return BlockManager(tuple(blocks), axes)