def reorder_arrays(arrays: list[ArrayLike], arr_columns: Index, columns: Index | None, length: int) -> tuple[list[ArrayLike], Index]: """ Pre-emptively (cheaply) reindex arrays with new columns. """ # reorder according to the columns if columns is not None: if not columns.equals(arr_columns): # if they are equal, there is nothing to do new_arrays: list[ArrayLike | None] new_arrays = [None] * len(columns) indexer = arr_columns.get_indexer(columns) for i, k in enumerate(indexer): if k == -1: # by convention default is all-NaN object dtype arr = np.empty(length, dtype=object) arr.fill(np.nan) else: arr = arrays[k] new_arrays[i] = arr # Incompatible types in assignment (expression has type # "List[Union[ExtensionArray, ndarray[Any, Any], None]]", variable # has type "List[Union[ExtensionArray, ndarray[Any, Any]]]") arrays = new_arrays # type: ignore[assignment] arr_columns = columns return arrays, arr_columns
def _cast_types(self, values, cast_type, column): """ Cast values to specified type Parameters ---------- values : ndarray cast_type : string or np.dtype dtype to cast values to column : string column name - used only for error reporting Returns ------- converted : ndarray """ if is_categorical_dtype(cast_type): known_cats = (isinstance(cast_type, CategoricalDtype) and cast_type.categories is not None) if not is_object_dtype(values) and not known_cats: # TODO: this is for consistency with # c-parser which parses all categories # as strings values = astype_nansafe(values, np.dtype(str)) cats = Index(values).unique().dropna() values = Categorical._from_inferred_categories( cats, cats.get_indexer(values), cast_type, true_values=self.true_values) # use the EA's implementation of casting elif is_extension_array_dtype(cast_type): # ensure cast_type is an actual dtype and not a string cast_type = pandas_dtype(cast_type) array_type = cast_type.construct_array_type() try: if is_bool_dtype(cast_type): return array_type._from_sequence_of_strings( values, dtype=cast_type, true_values=self.true_values, false_values=self.false_values, ) else: return array_type._from_sequence_of_strings( values, dtype=cast_type) except NotImplementedError as err: raise NotImplementedError( f"Extension Array: {array_type} must implement " "_from_sequence_of_strings in order to be used in parser methods" ) from err else: try: values = astype_nansafe(values, cast_type, copy=True, skipna=True) except ValueError as err: raise ValueError( f"Unable to convert column {column} to type {cast_type}" ) from err return values