def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase, how: str) -> Tuple[ColumnBase, ColumnBase]: # Casts lcol and rcol to a common dtype for use as join keys. If no casting # is necessary, they are returned as is. common_type = None # cast the keys lcol and rcol to a common dtype ltype = lcol.dtype rtype = rcol.dtype # if either side is categorical, different logic left_is_categorical = isinstance(ltype, CategoricalDtype) right_is_categorical = isinstance(rtype, CategoricalDtype) if left_is_categorical and right_is_categorical: return _match_categorical_dtypes_both(cast(CategoricalColumn, lcol), cast(CategoricalColumn, rcol), how) elif left_is_categorical or right_is_categorical: if left_is_categorical: if how in {"left", "leftsemi", "leftanti"}: return lcol, rcol.astype(ltype) common_type = ltype.categories.dtype else: common_type = rtype.categories.dtype return lcol.astype(common_type), rcol.astype(common_type) if is_dtype_equal(ltype, rtype): return lcol, rcol if is_decimal_dtype(ltype) or is_decimal_dtype(rtype): raise TypeError( "Decimal columns can only be merged with decimal columns " "of the same precision and scale") if (np.issubdtype(ltype, np.number)) and (np.issubdtype(rtype, np.number)): common_type = (max(ltype, rtype) if ltype.kind == rtype.kind else np.find_common_type([], (ltype, rtype))) elif np.issubdtype(ltype, np.datetime64) and np.issubdtype( rtype, np.datetime64): common_type = max(ltype, rtype) if how == "left": if rcol.fillna(0).can_cast_safely(ltype): return lcol, rcol.astype(ltype) else: warnings.warn(f"Can't safely cast column from {rtype} to {ltype}, " f"upcasting to {common_type}.") return lcol.astype(common_type), rcol.astype(common_type)
def fillna(self, fill_value=None, method=None): if fill_value is not None: if cudf.utils.utils.isnat(fill_value): return _fillna_natwise(self) col = self if is_scalar(fill_value): if isinstance(fill_value, np.timedelta64): dtype = determine_out_dtype(self.dtype, fill_value.dtype) fill_value = fill_value.astype(dtype) col = col.astype(dtype) if not isinstance(fill_value, cudf.Scalar): fill_value = cudf.Scalar(fill_value, dtype=dtype) else: fill_value = column.as_column(fill_value, nan_as_null=False) return ColumnBase.fillna(col, fill_value) else: return super().fillna(method=method)
def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase, how: str) -> Tuple[ColumnBase, ColumnBase]: # returns the common dtype that lcol and rcol should be casted to, # before they can be used as left and right join keys. # If no casting is necessary, returns None common_type = None # cast the keys lcol and rcol to a common dtype ltype = lcol.dtype rtype = rcol.dtype # if either side is categorical, different logic if isinstance(ltype, CategoricalDtype) or isinstance( rtype, CategoricalDtype): return _match_categorical_dtypes(lcol, rcol, how) if pd.api.types.is_dtype_equal(ltype, rtype): return lcol, rcol if isinstance(ltype, cudf.Decimal64Dtype) or isinstance( rtype, cudf.Decimal64Dtype): raise TypeError( "Decimal columns can only be merged with decimal columns " "of the same precision and scale") if (np.issubdtype(ltype, np.number)) and (np.issubdtype(rtype, np.number)): common_type = (max(ltype, rtype) if ltype.kind == rtype.kind else np.find_common_type([], (ltype, rtype))) elif np.issubdtype(ltype, np.datetime64) and np.issubdtype( rtype, np.datetime64): common_type = max(ltype, rtype) if how == "left": if rcol.fillna(0).can_cast_safely(ltype): return lcol, rcol.astype(ltype) else: warnings.warn(f"Can't safely cast column from {rtype} to {ltype}, " "upcasting to {common_type}.") return lcol.astype(common_type), rcol.astype(common_type)