예제 #1
0
파일: utils.py 프로젝트: lqtruong/cudf
def scalar_broadcast_to(scalar, size, dtype=None):

    if isinstance(size, (tuple, list)):
        size = size[0]

    if scalar is None or (isinstance(scalar, (np.datetime64, np.timedelta64))
                          and np.isnat(scalar)):
        if dtype is None:
            dtype = "object"
        return column.column_empty(size, dtype=dtype, masked=True)

    if isinstance(scalar, pd.Categorical):
        if dtype is None:
            return _categorical_scalar_broadcast_to(scalar, size)
        else:
            return scalar_broadcast_to(scalar.categories[0],
                                       size).astype(dtype)

    scalar = to_cudf_compatible_scalar(scalar, dtype=dtype)
    dtype = scalar.dtype

    if np.dtype(dtype).kind in ("O", "U"):
        gather_map = column.full(size, 0, dtype="int32")
        scalar_str_col = column.as_column([scalar], dtype="str")
        return scalar_str_col[gather_map]
    else:
        out_col = column.column_empty(size, dtype=dtype)
        if out_col.size != 0:
            out_col.data_array_view[:] = scalar
        return out_col
예제 #2
0
 def _apply_agg_series(self, sr, agg_name):
     if isinstance(self.window, int):
         result_col = libcudf.rolling.rolling(
             sr._column,
             None,
             None,
             self.window,
             self.min_periods,
             self.center,
             agg_name,
         )
     else:
         result_col = libcudf.rolling.rolling(
             sr._column,
             as_column(self.window),
             column.full(self.window.size, 0, dtype=self.window.dtype),
             None,
             self.min_periods,
             self.center,
             agg_name,
         )
     return sr._from_data({sr.name: result_col}, sr._index)
예제 #3
0
    def _apply_agg_column(self, source_column, agg_name):
        min_periods = self.min_periods or 1
        if isinstance(self.window, int):
            preceding_window = None
            following_window = None
            window = self.window
        elif isinstance(self.window, BaseIndexer):
            start, end = self.window.get_window_bounds(
                num_values=len(self.obj),
                min_periods=self.min_periods,
                center=self.center,
                closed=None,
            )
            start = as_column(start, dtype="int32")
            end = as_column(end, dtype="int32")

            idx = cudf.core.column.arange(len(start))
            preceding_window = (idx - start +
                                cudf.Scalar(1, "int32")).astype("int32")
            following_window = (end - idx -
                                cudf.Scalar(1, "int32")).astype("int32")
            window = None
        else:
            preceding_window = as_column(self.window)
            following_window = column.full(self.window.size,
                                           0,
                                           dtype=self.window.dtype)
            window = None

        return libcudf.rolling.rolling(
            source_column=source_column,
            pre_column_window=preceding_window,
            fwd_column_window=following_window,
            window=window,
            min_periods=min_periods,
            center=self.center,
            op=agg_name,
            agg_params=self.agg_params,
        )
예제 #4
0
def scalar_broadcast_to(scalar, size, dtype=None):

    if isinstance(size, (tuple, list)):
        size = size[0]

    if cudf._lib.scalar._is_null_host_scalar(scalar):
        if dtype is None:
            dtype = "object"
        return column.column_empty(size, dtype=dtype, masked=True)

    if isinstance(scalar, pd.Categorical):
        if dtype is None:
            return _categorical_scalar_broadcast_to(scalar, size)
        else:
            return scalar_broadcast_to(scalar.categories[0],
                                       size).astype(dtype)

    if isinstance(scalar, decimal.Decimal):
        if dtype is None:
            dtype = cudf.Decimal64Dtype._from_decimal(scalar)

        out_col = column.column_empty(size, dtype=dtype)
        if out_col.size != 0:
            out_col[:] = scalar
        return out_col

    scalar = to_cudf_compatible_scalar(scalar, dtype=dtype)
    dtype = scalar.dtype

    if cudf.dtype(dtype).kind in ("O", "U"):
        gather_map = column.full(size, 0, dtype="int32")
        scalar_str_col = column.as_column([scalar], dtype="str")
        return scalar_str_col[gather_map]
    else:
        out_col = column.column_empty(size, dtype=dtype)
        if out_col.size != 0:
            out_col.data_array_view[:] = scalar
        return out_col