def find_and_replace(self, to_replace, replacement, all_nan): """ Return col with *to_replace* replaced with *value*. """ to_replace_col = _normalize_find_and_replace_input( self.dtype, to_replace ) if all_nan: replacement_col = column.as_column(replacement, dtype=self.dtype) else: replacement_col = _normalize_find_and_replace_input( self.dtype, replacement ) if len(replacement_col) == 1 and len(to_replace_col) > 1: replacement_col = column.as_column( utils.scalar_broadcast_to( replacement[0], (len(to_replace_col),), self.dtype ) ) replaced = self.copy() to_replace_col, replacement_col, replaced = numeric_normalize_types( to_replace_col, replacement_col, replaced ) return libcudf.replace.replace( replaced, to_replace_col, replacement_col )
def find_and_replace( self, to_replace: ColumnLike, replacement: ColumnLike, all_nan: bool = False, ) -> NumericalColumn: """ Return col with *to_replace* replaced with *value*. """ # If all of `to_replace`/`replacement` are `None`, # dtype of `to_replace_col`/`replacement_col` # is inferred as `string`, but this is a valid # float64 column too, Hence we will need to type-cast # to self.dtype. to_replace_col = column.as_column(to_replace) if to_replace_col.null_count == len(to_replace_col): to_replace_col = to_replace_col.astype(self.dtype) replacement_col = column.as_column(replacement) if replacement_col.null_count == len(replacement_col): replacement_col = replacement_col.astype(self.dtype) if type(to_replace_col) != type(replacement_col): raise TypeError( f"to_replace and value should be of same types," f"got to_replace dtype: {to_replace_col.dtype} and " f"value dtype: {replacement_col.dtype}") if not isinstance(to_replace_col, NumericalColumn) and not isinstance( replacement_col, NumericalColumn): return self.copy() to_replace_col = _normalize_find_and_replace_input( self.dtype, to_replace) if all_nan: replacement_col = column.as_column(replacement, dtype=self.dtype) else: replacement_col = _normalize_find_and_replace_input( self.dtype, replacement) if len(replacement_col) == 1 and len(to_replace_col) > 1: replacement_col = column.as_column( utils.scalar_broadcast_to(replacement[0], (len(to_replace_col), ), self.dtype)) elif len(replacement_col) == 1 and len(to_replace_col) == 0: return self.copy() to_replace_col, replacement_col, replaced = numeric_normalize_types( to_replace_col, replacement_col, self) df = cudf.DataFrame._from_data({ "old": to_replace_col, "new": replacement_col }) df = df.drop_duplicates(subset=["old"], keep="last", ignore_index=True) if df._data["old"].null_count == 1: replaced = replaced.fillna( df._data["new"][df._data["old"].isnull()][0]) df = df.dropna(subset=["old"]) return libcudf.replace.replace(replaced, df._data["old"], df._data["new"])
def find_and_replace(self, to_replace, replacement, all_nan): """ Return col with *to_replace* replaced with *value*. """ to_replace_col = column.as_column(to_replace) replacement_dtype = self.dtype if all_nan else None replacement_col = column.as_column(replacement, dtype=replacement_dtype) replaced = self.copy() to_replace_col, replacement_col, replaced = numeric_normalize_types( to_replace_col, replacement_col, replaced) output = libcudf.replace.replace(replaced, to_replace_col, replacement_col) return output
def find_and_replace( self, to_replace: ColumnLike, replacement: ColumnLike, all_nan: bool = False, ) -> NumericalColumn: """ Return col with *to_replace* replaced with *value*. """ to_replace_col = as_column(to_replace) replacement_col = as_column(replacement) if type(to_replace_col) != type(replacement_col): raise TypeError( f"to_replace and value should be of same types," f"got to_replace dtype: {to_replace_col.dtype} and " f"value dtype: {replacement_col.dtype}" ) if not isinstance(to_replace_col, NumericalColumn) and not isinstance( replacement_col, NumericalColumn ): return self.copy() to_replace_col = _normalize_find_and_replace_input( self.dtype, to_replace ) if all_nan: replacement_col = column.as_column(replacement, dtype=self.dtype) else: replacement_col = _normalize_find_and_replace_input( self.dtype, replacement ) replaced = self.copy() if len(replacement_col) == 1 and len(to_replace_col) > 1: replacement_col = column.as_column( utils.scalar_broadcast_to( replacement[0], (len(to_replace_col),), self.dtype ) ) elif len(replacement_col) == 1 and len(to_replace_col) == 0: return replaced to_replace_col, replacement_col, replaced = numeric_normalize_types( to_replace_col, replacement_col, replaced ) return libcudf.replace.replace( replaced, to_replace_col, replacement_col )
def find_and_replace( self, to_replace: ColumnLike, replacement: ColumnLike, all_nan: bool = False, ) -> NumericalColumn: """ Return col with *to_replace* replaced with *value*. """ to_replace_col = column.as_column(to_replace) replacement_col = column.as_column(replacement) if type(to_replace_col) != type(replacement_col): raise TypeError( f"to_replace and value should be of same types," f"got to_replace dtype: {to_replace_col.dtype} and " f"value dtype: {replacement_col.dtype}") if not isinstance(to_replace_col, NumericalColumn) and not isinstance( replacement_col, NumericalColumn): return self.copy() to_replace_col = _normalize_find_and_replace_input( self.dtype, to_replace) if all_nan: replacement_col = column.as_column(replacement, dtype=self.dtype) else: replacement_col = _normalize_find_and_replace_input( self.dtype, replacement) replaced = self.copy() if len(replacement_col) == 1 and len(to_replace_col) > 1: replacement_col = column.as_column( utils.scalar_broadcast_to(replacement[0], (len(to_replace_col), ), self.dtype)) elif len(replacement_col) == 1 and len(to_replace_col) == 0: return replaced to_replace_col, replacement_col, replaced = numeric_normalize_types( to_replace_col, replacement_col, replaced) df = cudf.DataFrame({"old": to_replace_col, "new": replacement_col}) df = df.drop_duplicates(subset=["old"], keep="last", ignore_index=True) if df._data["old"].null_count == 1: replaced = replaced.fillna( df._data["new"][df._data["old"].isna()][0]) df = df.dropna(subset=["old"]) return libcudf.replace.replace(replaced, df["old"]._column, df["new"]._column)
def append(self, other): """ Append a collection of Index options together. Parameters ---------- other : Index or list/tuple of indices Returns ------- appended : Index Examples -------- >>> import cudf >>> idx = cudf.Index([1, 2, 10, 100]) >>> idx Int64Index([1, 2, 10, 100], dtype='int64') >>> other = cudf.Index([200, 400, 50]) >>> other Int64Index([200, 400, 50], dtype='int64') >>> idx.append(other) Int64Index([1, 2, 10, 100, 200, 400, 50], dtype='int64') append accepts list of Index objects >>> idx.append([other, other]) Int64Index([1, 2, 10, 100, 200, 400, 50, 200, 400, 50], dtype='int64') """ if is_list_like(other): to_concat = [self] to_concat.extend(other) else: this = self if len(other) == 0: # short-circuit and return a copy to_concat = [self] other = cudf.Index(other) if len(self) == 0: to_concat = [other] if len(self) and len(other): if is_mixed_with_object_dtype(this, other): got_dtype = (other.dtype if this.dtype == cudf.dtype("object") else this.dtype) raise TypeError( f"cudf does not support appending an Index of " f"dtype `{cudf.dtype('object')}` with an Index " f"of dtype `{got_dtype}`, please type-cast " f"either one of them to same dtypes.") if isinstance(self._values, cudf.core.column.NumericalColumn): if self.dtype != other.dtype: this, other = numeric_normalize_types(self, other) to_concat = [this, other] for obj in to_concat: if not isinstance(obj, BaseIndex): raise TypeError("all inputs must be Index") return self._concat(to_concat)