예제 #1
0
class Series(_Frame, dd.core.Series):
    _partition_type = cudf.Series

    def count(self, split_every=False):
        return reduction(
            self, chunk=M.count, aggregate=np.sum, split_every=split_every, meta="i8"
        )

    def mean(self, split_every=False):
        sum = self.sum(split_every=split_every)
        n = self.count(split_every=split_every)
        return sum / n

    def unique_k(self, k, split_every=None):
        return reduction(
            self,
            chunk=M.unique_k,
            aggregate=unique_k_agg,
            meta=self._meta,
            token="unique-k",
            split_every=split_every,
            k=k,
        )

    # ----------------------------------------------------------------------
    # Accessor Methods
    # ----------------------------------------------------------------------
    dt = CachedAccessor("dt", DatetimeAccessor)
    cat = CachedAccessor("cat", CategoricalAccessor)
예제 #2
0
class Series(_Frame, dd.core.Series):
    _partition_type = cudf.Series

    def count(self, split_every=False):
        return reduction(
            self, chunk=M.count, aggregate=np.sum, split_every=split_every, meta="i8"
        )

    def mean(self, split_every=False):
        sum = self.sum(split_every=split_every)
        n = self.count(split_every=split_every)
        return sum / n

    def unique_k(self, k, split_every=None):
        return reduction(
            self,
            chunk=M.unique_k,
            aggregate=unique_k_agg,
            meta=self._meta,
            token="unique-k",
            split_every=split_every,
            k=k,
        )

    @derived_from(pd.DataFrame)
    def var(self, axis=None, skipna=True, ddof=1, split_every=False, dtype=None, out=None):
        axis = self._validate_axis(axis)
        meta = self._meta_nonempty.var(axis=axis, skipna=skipna)
        if axis == 1:
            result = map_partitions(M.var, self, meta=meta,
                                    token=self._token_prefix + 'var',
                                    axis=axis, skipna=skipna, ddof=ddof)
            return handle_out(out, result)

        else:
            num = self._get_numeric_data()
            x = 1.0 * num.sum(skipna=skipna, split_every=split_every)
            x2 = 1.0 * (num ** 2).sum(skipna=skipna, split_every=split_every)
            n = num.count(split_every=split_every)
            name = self._token_prefix + 'var'
            result = map_partitions(var_aggregate, x2, x, n,
                                    token=name, meta=meta, ddof=ddof)
            if isinstance(self, DataFrame):
                result.divisions = (min(self.columns), max(self.columns))
            return handle_out(out, result)


    # ----------------------------------------------------------------------
    # Accessor Methods
    # ----------------------------------------------------------------------
    dt = CachedAccessor("dt", DatetimeAccessor)
    cat = CachedAccessor("cat", CategoricalAccessor)
예제 #3
0
파일: core.py 프로젝트: tym1062/dask-cudf
class Series(_Frame):
    _partition_type = gd.Series

    @property
    def dtype(self):
        return self._meta.dtype

    def astype(self, dtype):
        if dtype == self.dtype:
            return self
        return self.map_partitions(M.astype, dtype=dtype)

    def sum(self, split_every=False):
        return reduction(self, chunk=M.sum, aggregate=np.sum,
                         split_every=split_every, meta=self.dtype)

    def count(self, split_every=False):
        return reduction(self, chunk=M.count, aggregate=np.sum,
                         split_every=split_every, meta='i8')

    def mean(self, split_every=False):
        sum = self.sum(split_every=split_every)
        n = self.count(split_every=split_every)
        return sum / n

    def var(self, ddof=1, split_every=False):
        sum2 = reduction(self, chunk=sum_of_squares, aggregate=np.sum,
                         split_every=split_every, meta='f8')
        sum = self.sum(split_every=split_every)
        n = self.count(split_every=split_every)
        return map_partitions(var_aggregate, sum2, sum, n, ddof=ddof,
                              meta='f8')

    def std(self, ddof=1, split_every=False):
        var = self.var(ddof=ddof, split_every=split_every)
        return map_partitions(np.sqrt, var, dtype=np.float64)

    def min(self, split_every=False):
        return reduction(self, chunk=M.min, aggregate=np.min,
                         split_every=split_every, meta=self.dtype)

    def max(self, split_every=False):
        return reduction(self, chunk=M.max, aggregate=np.max,
                         split_every=split_every, meta=self.dtype)

    def ceil(self):
        return self.map_partitions(M.ceil)

    def floor(self):
        return self.map_partitions(M.floor)

    def fillna(self, value):
        if not np.can_cast(value, self.dtype):
            raise TypeError("fill value must match dtype of series")
        return self.map_partitions(M.fillna, value, meta=self)

    def nlargest(self, n=5, split_every=None):
        return reduction(self, chunk=M.nlargest, aggregate=nlargest_agg,
                         meta=self._meta, token='series-nlargest',
                         split_every=split_every, n=n)

    def nsmallest(self, n=5, split_every=None):
        return reduction(self, chunk=M.nsmallest, aggregate=nsmallest_agg,
                         meta=self._meta, token='series-nsmallest',
                         split_every=split_every, n=n)

    def unique_k(self, k, split_every=None):
        return reduction(self, chunk=M.unique_k, aggregate=unique_k_agg,
                         meta=self._meta, token='unique-k',
                         split_every=split_every, k=k)

    # ----------------------------------------------------------------------
    # Accessor Methods
    # ----------------------------------------------------------------------
    dt = CachedAccessor("dt", DatetimeAccessor)
    cat = CachedAccessor("cat", CategoricalAccessor)