def dict_to_mgr(data: Dict, index, columns, dtype: Optional[DtypeObj], typ: str) -> Manager: """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. Used in DataFrame.__init__ """ arrays: Union[Sequence[Any], Series] if columns is not None: from pandas.core.series import Series arrays = Series(data, index=columns, dtype=object) data_names = arrays.index missing = arrays.isna() if index is None: # GH10856 # raise ValueError if only scalars in dict index = extract_index(arrays[~missing]) else: index = ensure_index(index) # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): nan_dtype: DtypeObj if dtype is None or (isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.flexible)): # GH#1783 nan_dtype = np.dtype("object") else: nan_dtype = dtype val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) arrays.loc[missing] = [val] * missing.sum() else: keys = list(data.keys()) columns = data_names = Index(keys) arrays = [com.maybe_iterable_to_list(data[k]) for k in keys] # GH#24096 need copy to be deep for datetime64tz case # TODO: See if we can avoid these copies arrays = [ arr if not isinstance(arr, ABCIndex) else arr._data for arr in arrays ] arrays = [ arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays ] return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype, typ=typ)
def init_dict(data, index, columns, dtype=None): """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. """ if columns is not None: from pandas.core.series import Series arrays = Series(data, index=columns, dtype=object) data_names = arrays.index missing = arrays.isna() if index is None: # GH10856 # raise ValueError if only scalars in dict index = extract_index(arrays[~missing]) else: index = ensure_index(index) # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): if dtype is None or np.issubdtype(dtype, np.flexible): # GH#1783 nan_dtype = object else: nan_dtype = dtype val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) arrays.loc[missing] = [val] * missing.sum() else: keys = com.dict_keys_to_ordered_list(data) columns = data_names = Index(keys) arrays = (com.maybe_iterable_to_list(data[k]) for k in keys) # GH#24096 need copy to be deep for datetime64tz case # TODO: See if we can avoid these copies arrays = [ arr if not isinstance(arr, ABCIndexClass) else arr._data for arr in arrays ] arrays = [ arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays ] return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
def dict_to_mgr( data: dict, index, columns, *, dtype: DtypeObj | None = None, typ: str = "block", copy: bool = True, ) -> Manager: """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. Used in DataFrame.__init__ """ arrays: Sequence[Any] | Series if columns is not None: from pandas.core.series import Series arrays = Series(data, index=columns, dtype=object) missing = arrays.isna() if index is None: # GH10856 # raise ValueError if only scalars in dict index = _extract_index(arrays[~missing]) else: index = ensure_index(index) # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): nan_dtype: DtypeObj if dtype is not None: # calling sanitize_array ensures we don't mix-and-match # NA dtypes midxs = missing.values.nonzero()[0] for i in midxs: arr = sanitize_array(arrays.iat[i], index, dtype=dtype) arrays.iat[i] = arr else: # GH#1783 nan_dtype = np.dtype("object") val = construct_1d_arraylike_from_scalar( np.nan, len(index), nan_dtype) arrays.loc[missing] = [val] * missing.sum() arrays = list(arrays) columns = ensure_index(columns) else: keys = list(data.keys()) columns = Index(keys) arrays = [com.maybe_iterable_to_list(data[k]) for k in keys] arrays = [ arr if not isinstance(arr, Index) else arr._data for arr in arrays ] if copy: if typ == "block": # We only need to copy arrays that will not get consolidated, i.e. # only EA arrays arrays = [ x.copy() if isinstance(x, ExtensionArray) else x for x in arrays ] else: # dtype check to exclude e.g. range objects, scalars arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays] return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)
def dict_to_mgr(data: Dict, index, columns, dtype: Optional[DtypeObj], typ: str) -> Manager: """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. Used in DataFrame.__init__ """ arrays: Union[Sequence[Any], Series] if columns is not None: from pandas.core.series import Series arrays = Series(data, index=columns, dtype=object) data_names = arrays.index missing = arrays.isna() if index is None: # GH10856 # raise ValueError if only scalars in dict index = extract_index(arrays[~missing]) else: index = ensure_index(index) # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): if dtype is None or ( not is_extension_array_dtype(dtype) # error: Argument 1 to "issubdtype" has incompatible type # "Union[dtype, ExtensionDtype]"; expected "Union[dtype, None, # type, _SupportsDtype, str, Tuple[Any, int], Tuple[Any, # Union[int, Sequence[int]]], List[Any], _DtypeDict, Tuple[Any, # Any]]" and np.issubdtype(dtype, np.flexible) # type: ignore[arg-type] ): # GH#1783 # error: Value of type variable "_DTypeScalar" of "dtype" cannot be # "object" nan_dtype = np.dtype(object) # type: ignore[type-var] else: # error: Incompatible types in assignment (expression has type # "Union[dtype, ExtensionDtype]", variable has type "dtype") nan_dtype = dtype # type: ignore[assignment] val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) arrays.loc[missing] = [val] * missing.sum() else: keys = list(data.keys()) columns = data_names = Index(keys) arrays = [com.maybe_iterable_to_list(data[k]) for k in keys] # GH#24096 need copy to be deep for datetime64tz case # TODO: See if we can avoid these copies arrays = [ arr if not isinstance(arr, ABCIndex) else arr._data for arr in arrays ] arrays = [ arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays ] return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype, typ=typ)
def dict_to_mgr( data: dict, index, columns, *, dtype: DtypeObj | None = None, typ: str = "block", copy: bool = True, ) -> Manager: """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. Used in DataFrame.__init__ """ arrays: Sequence[Any] | Series if columns is not None: from pandas.core.series import Series arrays = Series(data, index=columns, dtype=object) data_names = arrays.index missing = arrays.isna() if index is None: # GH10856 # raise ValueError if only scalars in dict index = _extract_index(arrays[~missing]) else: index = ensure_index(index) # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): nan_dtype: DtypeObj if dtype is None or (isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.flexible)): # GH#1783 nan_dtype = np.dtype("object") else: nan_dtype = dtype val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) arrays.loc[missing] = [val] * missing.sum() arrays = list(arrays) else: keys = list(data.keys()) columns = data_names = Index(keys) arrays = [com.maybe_iterable_to_list(data[k]) for k in keys] # GH#24096 need copy to be deep for datetime64tz case # TODO: See if we can avoid these copies arrays = [ arr if not isinstance(arr, ABCIndex) else arr._data for arr in arrays ] arrays = [ arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays ] if copy: # arrays_to_mgr (via form_blocks) won't make copies for EAs # dtype attr check to exclude EADtype-castable strs arrays = [ x if not hasattr(x, "dtype") or not isinstance(x.dtype, ExtensionDtype) else x.copy() for x in arrays ] # TODO: can we get rid of the dt64tz special case above? return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype, typ=typ, consolidate=copy)