def ndarray_to_mgr(values, index, columns, dtype: DtypeObj | None, copy: bool, typ: str) -> Manager: # used in DataFrame.__init__ # input must be a ndarray, list, Series, Index, ExtensionArray if isinstance(values, ABCSeries): if columns is None: if values.name is not None: columns = Index([values.name]) if index is None: index = values.index else: values = values.reindex(index) # zero len case (GH #2234) if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) # if the array preparation does a copy -> avoid this for ArrayManager, # since the copy is done on conversion to 1D arrays copy_on_sanitize = False if typ == "array" else copy vdtype = getattr(values, "dtype", None) if is_1d_only_ea_dtype(vdtype) or is_1d_only_ea_dtype(dtype): # GH#19157 if isinstance(values, (np.ndarray, ExtensionArray)) and values.ndim > 1: # GH#12513 a EA dtype passed with a 2D array, split into # multiple EAs that view the values # error: No overload variant of "__getitem__" of "ExtensionArray" # matches argument type "Tuple[slice, int]" values = [ values[:, n] # type: ignore[call-overload] for n in range(values.shape[1]) ] else: values = [values] if columns is None: columns = Index(range(len(values))) else: columns = ensure_index(columns) return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ) elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype): # i.e. Datetime64TZ, PeriodDtype values = extract_array(values, extract_numpy=True) if copy: values = values.copy() if values.ndim == 1: values = values.reshape(-1, 1) else: # by definition an array here # the dtypes will be coerced to a single dtype values = _prep_ndarray(values, copy=copy_on_sanitize) if dtype is not None and not is_dtype_equal(values.dtype, dtype): shape = values.shape flat = values.ravel() # GH#40110 see similar check inside sanitize_array rcf = not (is_integer_dtype(dtype) and values.dtype.kind == "f") values = sanitize_array(flat, None, dtype=dtype, copy=copy_on_sanitize, raise_cast_failure=rcf) values = values.reshape(shape) # _prep_ndarray ensures that values.ndim == 2 at this point index, columns = _get_axes(values.shape[0], values.shape[1], index=index, columns=columns) _check_values_indices_shape_match(values, index, columns) if typ == "array": if issubclass(values.dtype.type, str): values = np.array(values, dtype=object) if dtype is None and is_object_dtype(values.dtype): arrays = [ ensure_wrapped_if_datetimelike( maybe_infer_to_datetimelike(values[:, i])) for i in range(values.shape[1]) ] else: if is_datetime_or_timedelta_dtype(values.dtype): values = ensure_wrapped_if_datetimelike(values) arrays = [values[:, i] for i in range(values.shape[1])] if copy: arrays = [arr.copy() for arr in arrays] return ArrayManager(arrays, [index, columns], verify_integrity=False) values = values.T # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values.dtype): obj_columns = list(values) maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns] # don't convert (and copy) the objects if no type inference occurs if any(x is not y for x, y in zip(obj_columns, maybe_datetime)): dvals_list = [ ensure_block_shape(dval, 2) for dval in maybe_datetime ] block_values = [ new_block_2d(dvals_list[n], placement=BlockPlacement(n)) for n in range(len(dvals_list)) ] else: bp = BlockPlacement(slice(len(columns))) nb = new_block_2d(values, placement=bp) block_values = [nb] else: bp = BlockPlacement(slice(len(columns))) nb = new_block_2d(values, placement=bp) block_values = [nb] if len(columns) == 0: block_values = [] return create_block_manager_from_blocks(block_values, [columns, index], verify_integrity=False)
def concatenate_managers(mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool) -> Manager: """ Concatenate block managers into one. Parameters ---------- mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples axes : list of Index concat_axis : int copy : bool Returns ------- BlockManager """ # TODO(ArrayManager) this assumes that all managers are of the same type if isinstance(mgrs_indexers[0][0], ArrayManager): return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) # Assertions disabled for performance # for tup in mgrs_indexers: # # caller is responsible for ensuring this # indexers = tup[1] # assert concat_axis not in indexers if concat_axis == 0: return _concat_managers_axis0(mgrs_indexers, axes, copy) mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) # Assertion disabled for performance # assert all(not x[1] for x in mgrs_indexers) concat_plans = [ _get_mgr_concatenation_plan(mgr) for mgr, _ in mgrs_indexers ] concat_plan = _combine_concat_plans(concat_plans) blocks = [] for placement, join_units in concat_plan: unit = join_units[0] blk = unit.block # Assertion disabled for performance # assert len(join_units) == len(mgrs_indexers) if len(join_units) == 1: values = blk.values if copy: values = values.copy() else: values = values.view() fastpath = True elif _is_uniform_join_units(join_units): vals = [ju.block.values for ju in join_units] if not blk.is_extension: # _is_uniform_join_units ensures a single dtype, so # we can use np.concatenate, which is more performant # than concat_compat values = np.concatenate(vals, axis=1) else: # TODO(EA2D): special-casing not needed with 2D EAs values = concat_compat(vals, axis=1) values = ensure_block_shape(values, ndim=2) values = ensure_wrapped_if_datetimelike(values) fastpath = blk.values.dtype == values.dtype else: values = _concatenate_join_units(join_units, copy=copy) fastpath = False if fastpath: b = blk.make_block_same_class(values, placement=placement) else: b = new_block_2d(values, placement=placement) blocks.append(b) return BlockManager(tuple(blocks), axes)