예제 #1
0
def reorder_arrays(arrays: list[ArrayLike], arr_columns: Index,
                   columns: Index | None,
                   length: int) -> tuple[list[ArrayLike], Index]:
    """
    Pre-emptively (cheaply) reindex arrays with new columns.
    """
    # reorder according to the columns
    if columns is not None:
        if not columns.equals(arr_columns):
            # if they are equal, there is nothing to do
            new_arrays: list[ArrayLike | None]
            new_arrays = [None] * len(columns)
            indexer = arr_columns.get_indexer(columns)
            for i, k in enumerate(indexer):
                if k == -1:
                    # by convention default is all-NaN object dtype
                    arr = np.empty(length, dtype=object)
                    arr.fill(np.nan)
                else:
                    arr = arrays[k]
                new_arrays[i] = arr

            # Incompatible types in assignment (expression has type
            # "List[Union[ExtensionArray, ndarray[Any, Any], None]]", variable
            # has type "List[Union[ExtensionArray, ndarray[Any, Any]]]")
            arrays = new_arrays  # type: ignore[assignment]
            arr_columns = columns

    return arrays, arr_columns
예제 #2
0
    def _cast_types(self, values, cast_type, column):
        """
        Cast values to specified type

        Parameters
        ----------
        values : ndarray
        cast_type : string or np.dtype
           dtype to cast values to
        column : string
            column name - used only for error reporting

        Returns
        -------
        converted : ndarray
        """
        if is_categorical_dtype(cast_type):
            known_cats = (isinstance(cast_type, CategoricalDtype)
                          and cast_type.categories is not None)

            if not is_object_dtype(values) and not known_cats:
                # TODO: this is for consistency with
                # c-parser which parses all categories
                # as strings

                values = astype_nansafe(values, np.dtype(str))

            cats = Index(values).unique().dropna()
            values = Categorical._from_inferred_categories(
                cats,
                cats.get_indexer(values),
                cast_type,
                true_values=self.true_values)

        # use the EA's implementation of casting
        elif is_extension_array_dtype(cast_type):
            # ensure cast_type is an actual dtype and not a string
            cast_type = pandas_dtype(cast_type)
            array_type = cast_type.construct_array_type()
            try:
                if is_bool_dtype(cast_type):
                    return array_type._from_sequence_of_strings(
                        values,
                        dtype=cast_type,
                        true_values=self.true_values,
                        false_values=self.false_values,
                    )
                else:
                    return array_type._from_sequence_of_strings(
                        values, dtype=cast_type)
            except NotImplementedError as err:
                raise NotImplementedError(
                    f"Extension Array: {array_type} must implement "
                    "_from_sequence_of_strings in order to be used in parser methods"
                ) from err

        else:
            try:
                values = astype_nansafe(values,
                                        cast_type,
                                        copy=True,
                                        skipna=True)
            except ValueError as err:
                raise ValueError(
                    f"Unable to convert column {column} to type {cast_type}"
                ) from err
        return values