コード例 #1
0
ファイル: join.py プロジェクト: TravisHester/cudf
 def _restore_categorical_keys(self, lhs: Frame,
                               rhs: Frame) -> Tuple[Frame, Frame]:
     # For inner joins, any categorical keys in `self.lhs` and `self.rhs`
     # were casted to their category type to produce `lhs` and `rhs`.
     # Here, we cast them back.
     out_lhs = lhs.copy(deep=False)
     out_rhs = rhs.copy(deep=False)
     if self.how == "inner":
         for left_key, right_key in zip(*self._keys):
             if isinstance(
                     left_key.get(self.lhs).dtype,
                     cudf.CategoricalDtype) and isinstance(
                         right_key.get(self.rhs).dtype,
                         cudf.CategoricalDtype):
                 left_key.set(
                     out_lhs,
                     left_key.get(out_lhs).astype("category"),
                     validate=False,
                 )
                 right_key.set(
                     out_rhs,
                     right_key.get(out_rhs).astype("category"),
                     validate=False,
                 )
     return out_lhs, out_rhs
コード例 #2
0
ファイル: join.py プロジェクト: TravisHester/cudf
 def _match_key_dtypes(self, lhs: Frame, rhs: Frame) -> Tuple[Frame, Frame]:
     # Match the dtypes of the key columns from lhs and rhs
     out_lhs = lhs.copy(deep=False)
     out_rhs = rhs.copy(deep=False)
     for left_key, right_key in zip(*self._keys):
         lcol, rcol = left_key.get(lhs), right_key.get(rhs)
         lcol_casted, rcol_casted = _match_join_keys(lcol,
                                                     rcol,
                                                     how=self.how)
         if lcol is not lcol_casted:
             left_key.set(out_lhs, lcol_casted, validate=False)
         if rcol is not rcol_casted:
             right_key.set(out_rhs, rcol_casted, validate=False)
     return out_lhs, out_rhs
コード例 #3
0
def _normalize_columns_and_scalars_type(
    frame: Frame,
    other: Any,
    inplace: bool = False,
) -> Tuple[Union[Frame, ColumnLike], Any, ]:
    """
    Try to normalize the other's dtypes as per frame.

    Parameters
    ----------

    frame : Can be a DataFrame or Series or Index
    other : Can be a DataFrame, Series, Index, Array
        like object or a scalar value

        if frame is DataFrame, other can be only a
        scalar or array like with size of number of columns
        in DataFrame or a DataFrame with same dimension

        if frame is Series, other can be only a scalar or
        a series like with same length as frame

    Returns:
    --------
    A dataframe/series/list/scalar form of normalized other
    """
    if isinstance(frame, DataFrame) and isinstance(other, DataFrame):
        source_df = frame.copy(deep=False)
        other_df = other.copy(deep=False)
        for self_col in source_df._column_names:
            source_col, other_col = _check_and_cast_columns_with_other(
                source_col=source_df._data[self_col],
                other=other_df._data[self_col],
                inplace=inplace,
            )
            source_df._data[self_col] = source_col
            other_df._data[self_col] = other_col
        return source_df, other_df

    elif isinstance(frame,
                    (Series, Index)) and not cudf.api.types.is_scalar(other):
        other = cudf.core.column.as_column(other)
        input_col = frame._data[frame.name]
        return _check_and_cast_columns_with_other(source_col=input_col,
                                                  other=other,
                                                  inplace=inplace)
    else:
        # Handles scalar or list/array like scalars
        if isinstance(frame,
                      (Series, Index)) and cudf.api.types.is_scalar(other):
            input_col = frame._data[frame.name]
            return _check_and_cast_columns_with_other(
                source_col=frame._data[frame.name],
                other=other,
                inplace=inplace,
            )

        elif isinstance(frame, DataFrame):
            source_df = frame.copy(deep=False)
            others = []
            for i, col_name in enumerate(frame._column_names):
                (
                    source_col,
                    other_scalar,
                ) = _check_and_cast_columns_with_other(
                    source_col=source_df._data[col_name],
                    other=other
                    if cudf.api.types.is_scalar(other) else other[i],
                    inplace=inplace,
                )
                source_df._data[col_name] = source_col
                others.append(other_scalar)
            return source_df, others
        else:
            raise ValueError(f"Inappropriate input {type(frame)} "
                             f"and other {type(other)} combination")