def _restore_categorical_keys(self, lhs: Frame, rhs: Frame) -> Tuple[Frame, Frame]: # For inner joins, any categorical keys in `self.lhs` and `self.rhs` # were casted to their category type to produce `lhs` and `rhs`. # Here, we cast them back. out_lhs = lhs.copy(deep=False) out_rhs = rhs.copy(deep=False) if self.how == "inner": for left_key, right_key in zip(*self._keys): if isinstance( left_key.get(self.lhs).dtype, cudf.CategoricalDtype) and isinstance( right_key.get(self.rhs).dtype, cudf.CategoricalDtype): left_key.set( out_lhs, left_key.get(out_lhs).astype("category"), validate=False, ) right_key.set( out_rhs, right_key.get(out_rhs).astype("category"), validate=False, ) return out_lhs, out_rhs
def _match_key_dtypes(self, lhs: Frame, rhs: Frame) -> Tuple[Frame, Frame]: # Match the dtypes of the key columns from lhs and rhs out_lhs = lhs.copy(deep=False) out_rhs = rhs.copy(deep=False) for left_key, right_key in zip(*self._keys): lcol, rcol = left_key.get(lhs), right_key.get(rhs) lcol_casted, rcol_casted = _match_join_keys(lcol, rcol, how=self.how) if lcol is not lcol_casted: left_key.set(out_lhs, lcol_casted, validate=False) if rcol is not rcol_casted: right_key.set(out_rhs, rcol_casted, validate=False) return out_lhs, out_rhs
def _normalize_columns_and_scalars_type( frame: Frame, other: Any, inplace: bool = False, ) -> Tuple[Union[Frame, ColumnLike], Any, ]: """ Try to normalize the other's dtypes as per frame. Parameters ---------- frame : Can be a DataFrame or Series or Index other : Can be a DataFrame, Series, Index, Array like object or a scalar value if frame is DataFrame, other can be only a scalar or array like with size of number of columns in DataFrame or a DataFrame with same dimension if frame is Series, other can be only a scalar or a series like with same length as frame Returns: -------- A dataframe/series/list/scalar form of normalized other """ if isinstance(frame, DataFrame) and isinstance(other, DataFrame): source_df = frame.copy(deep=False) other_df = other.copy(deep=False) for self_col in source_df._column_names: source_col, other_col = _check_and_cast_columns_with_other( source_col=source_df._data[self_col], other=other_df._data[self_col], inplace=inplace, ) source_df._data[self_col] = source_col other_df._data[self_col] = other_col return source_df, other_df elif isinstance(frame, (Series, Index)) and not cudf.api.types.is_scalar(other): other = cudf.core.column.as_column(other) input_col = frame._data[frame.name] return _check_and_cast_columns_with_other(source_col=input_col, other=other, inplace=inplace) else: # Handles scalar or list/array like scalars if isinstance(frame, (Series, Index)) and cudf.api.types.is_scalar(other): input_col = frame._data[frame.name] return _check_and_cast_columns_with_other( source_col=frame._data[frame.name], other=other, inplace=inplace, ) elif isinstance(frame, DataFrame): source_df = frame.copy(deep=False) others = [] for i, col_name in enumerate(frame._column_names): ( source_col, other_scalar, ) = _check_and_cast_columns_with_other( source_col=source_df._data[col_name], other=other if cudf.api.types.is_scalar(other) else other[i], inplace=inplace, ) source_df._data[col_name] = source_col others.append(other_scalar) return source_df, others else: raise ValueError(f"Inappropriate input {type(frame)} " f"and other {type(other)} combination")