class Series(_Frame, dd.core.Series): _partition_type = cudf.Series def count(self, split_every=False): return reduction( self, chunk=M.count, aggregate=np.sum, split_every=split_every, meta="i8" ) def mean(self, split_every=False): sum = self.sum(split_every=split_every) n = self.count(split_every=split_every) return sum / n def unique_k(self, k, split_every=None): return reduction( self, chunk=M.unique_k, aggregate=unique_k_agg, meta=self._meta, token="unique-k", split_every=split_every, k=k, ) # ---------------------------------------------------------------------- # Accessor Methods # ---------------------------------------------------------------------- dt = CachedAccessor("dt", DatetimeAccessor) cat = CachedAccessor("cat", CategoricalAccessor)
class Series(_Frame, dd.core.Series): _partition_type = cudf.Series def count(self, split_every=False): return reduction( self, chunk=M.count, aggregate=np.sum, split_every=split_every, meta="i8" ) def mean(self, split_every=False): sum = self.sum(split_every=split_every) n = self.count(split_every=split_every) return sum / n def unique_k(self, k, split_every=None): return reduction( self, chunk=M.unique_k, aggregate=unique_k_agg, meta=self._meta, token="unique-k", split_every=split_every, k=k, ) @derived_from(pd.DataFrame) def var(self, axis=None, skipna=True, ddof=1, split_every=False, dtype=None, out=None): axis = self._validate_axis(axis) meta = self._meta_nonempty.var(axis=axis, skipna=skipna) if axis == 1: result = map_partitions(M.var, self, meta=meta, token=self._token_prefix + 'var', axis=axis, skipna=skipna, ddof=ddof) return handle_out(out, result) else: num = self._get_numeric_data() x = 1.0 * num.sum(skipna=skipna, split_every=split_every) x2 = 1.0 * (num ** 2).sum(skipna=skipna, split_every=split_every) n = num.count(split_every=split_every) name = self._token_prefix + 'var' result = map_partitions(var_aggregate, x2, x, n, token=name, meta=meta, ddof=ddof) if isinstance(self, DataFrame): result.divisions = (min(self.columns), max(self.columns)) return handle_out(out, result) # ---------------------------------------------------------------------- # Accessor Methods # ---------------------------------------------------------------------- dt = CachedAccessor("dt", DatetimeAccessor) cat = CachedAccessor("cat", CategoricalAccessor)
class Series(_Frame): _partition_type = gd.Series @property def dtype(self): return self._meta.dtype def astype(self, dtype): if dtype == self.dtype: return self return self.map_partitions(M.astype, dtype=dtype) def sum(self, split_every=False): return reduction(self, chunk=M.sum, aggregate=np.sum, split_every=split_every, meta=self.dtype) def count(self, split_every=False): return reduction(self, chunk=M.count, aggregate=np.sum, split_every=split_every, meta='i8') def mean(self, split_every=False): sum = self.sum(split_every=split_every) n = self.count(split_every=split_every) return sum / n def var(self, ddof=1, split_every=False): sum2 = reduction(self, chunk=sum_of_squares, aggregate=np.sum, split_every=split_every, meta='f8') sum = self.sum(split_every=split_every) n = self.count(split_every=split_every) return map_partitions(var_aggregate, sum2, sum, n, ddof=ddof, meta='f8') def std(self, ddof=1, split_every=False): var = self.var(ddof=ddof, split_every=split_every) return map_partitions(np.sqrt, var, dtype=np.float64) def min(self, split_every=False): return reduction(self, chunk=M.min, aggregate=np.min, split_every=split_every, meta=self.dtype) def max(self, split_every=False): return reduction(self, chunk=M.max, aggregate=np.max, split_every=split_every, meta=self.dtype) def ceil(self): return self.map_partitions(M.ceil) def floor(self): return self.map_partitions(M.floor) def fillna(self, value): if not np.can_cast(value, self.dtype): raise TypeError("fill value must match dtype of series") return self.map_partitions(M.fillna, value, meta=self) def nlargest(self, n=5, split_every=None): return reduction(self, chunk=M.nlargest, aggregate=nlargest_agg, meta=self._meta, token='series-nlargest', split_every=split_every, n=n) def nsmallest(self, n=5, split_every=None): return reduction(self, chunk=M.nsmallest, aggregate=nsmallest_agg, meta=self._meta, token='series-nsmallest', split_every=split_every, n=n) def unique_k(self, k, split_every=None): return reduction(self, chunk=M.unique_k, aggregate=unique_k_agg, meta=self._meta, token='unique-k', split_every=split_every, k=k) # ---------------------------------------------------------------------- # Accessor Methods # ---------------------------------------------------------------------- dt = CachedAccessor("dt", DatetimeAccessor) cat = CachedAccessor("cat", CategoricalAccessor)