def seconds(self): """ Number of seconds (>= 0 and less than 1 day). Returns ------- NumericalColumn """ # This property must return the number of seconds (>= 0 and # less than 1 day) for each element, hence first performing # mod operation to remove the number of days and then performing # division operation to extract the number of seconds. return (self % as_device_scalar( np.timedelta64( _numpy_to_pandas_conversion["D"], "ns"))) // as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["s"], "ns"))
def nanoseconds(self): """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. Returns ------- NumericalColumn """ # This property must return the number of nanoseconds (>= 0 and # less than 1 microsecond) for each element, hence first performing # mod operation to remove the number of microseconds and then # performing division operation to extract the number # of nanoseconds. return (self % as_device_scalar( np.timedelta64( _numpy_to_pandas_conversion["us"], "ns"))) // as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["ns"], "ns"))
def days(self): """ Number of days for each element. Returns ------- NumericalColumn """ return self // as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["D"], "ns"))
def quantile(self, q, interpolation, exact): result = self.as_numerical.quantile(q=q, interpolation=interpolation, exact=exact) if isinstance(q, Number): return pd.Timestamp(result, unit=self.time_unit) result = result * as_device_scalar( _numpy_to_pandas_conversion[self.time_unit]) return result.astype("datetime64[ns]")
def _fill( self, fill_value: ScalarLike, begin: int, end: int, inplace: bool = False, ) -> "column.ColumnBase": if end <= begin or begin >= self.size: return self if inplace else self.copy() fill_code = self._encode(fill_value) fill_scalar = as_device_scalar(fill_code, self.codes.dtype) result = self if inplace else self.copy() libcudf.filling.fill_in_place(result.codes, begin, end, fill_scalar) return result
def _binary_op_truediv(self, rhs): lhs, rhs = self, rhs if pd.api.types.is_timedelta64_dtype(rhs.dtype): common_dtype = determine_out_dtype(self.dtype, rhs.dtype) lhs = lhs.astype(common_dtype).astype("float64") if isinstance(rhs, (cudf.Scalar, DeviceScalar)): if rhs.is_valid(): rhs = rhs.value.astype(common_dtype).astype("float64") else: rhs = as_device_scalar(None, "float64") else: rhs = rhs.astype(common_dtype).astype("float64") out_dtype = np.dtype("float64") elif rhs.dtype.kind in ("f", "i", "u"): out_dtype = self.dtype else: raise TypeError(f"Division of {self.dtype} with {rhs.dtype} " f"cannot be performed.") return lhs, rhs, out_dtype
def to_datetime( arg, errors="raise", dayfirst=False, yearfirst=False, utc=None, format=None, exact=True, unit="ns", infer_datetime_format=False, origin="unix", cache=True, ): """ Convert argument to datetime. Parameters ---------- arg : int, float, str, datetime, list, tuple, 1-d array, Series DataFrame/dict-like The object to convert to a datetime. errors : {'ignore', 'raise', 'coerce', 'warn'}, default 'raise' - If 'raise', then invalid parsing will raise an exception. - If 'coerce', then invalid parsing will be set as NaT. - If 'warn' : prints last exceptions as warnings and return the input. - If 'ignore', then invalid parsing will return the input. dayfirst : bool, default False Specify a date parse order if `arg` is str or its list-likes. If True, parses dates with the day first, eg 10/11/12 is parsed as 2012-11-10. Warning: dayfirst=True is not strict, but will prefer to parse with day first (this is a known bug, based on dateutil behavior). format : str, default None The strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse all the way up to nanoseconds. See strftime documentation for more information on choices: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior. unit : str, default 'ns' The unit of the arg (D,s,ms,us,ns) denote the unit, which is an integer or float number. This will be based off the origin(unix epoch start). Example, with unit='ms' and origin='unix' (the default), this would calculate the number of milliseconds to the unix epoch start. infer_datetime_format : bool, default False If True and no `format` is given, attempt to infer the format of the datetime strings, and if it can be inferred, switch to a faster method of parsing them. In some cases this can increase the parsing speed by ~5-10x. Returns ------- datetime If parsing succeeded. Return type depends on input: - list-like: DatetimeIndex - Series: Series of datetime64 dtype - scalar: Timestamp Examples -------- Assembling a datetime from multiple columns of a DataFrame. The keys can be common abbreviations like ['year', 'month', 'day', 'minute', 'second', 'ms', 'us', 'ns']) or plurals of the same >>> import cudf >>> df = cudf.DataFrame({'year': [2015, 2016], ... 'month': [2, 3], ... 'day': [4, 5]}) >>> cudf.to_datetime(df) 0 2015-02-04 1 2016-03-05 dtype: datetime64[ns] >>> cudf.to_datetime(1490195805, unit='s') numpy.datetime64('2017-03-22T15:16:45.000000000') >>> cudf.to_datetime(1490195805433502912, unit='ns') numpy.datetime64('1780-11-20T01:02:30.494253056') """ if arg is None: return None if exact is False: raise NotImplementedError("exact support is not yet implemented") if origin != "unix": raise NotImplementedError("origin support is not yet implemented") if yearfirst: raise NotImplementedError("yearfirst support is not yet implemented") try: if isinstance(arg, cudf.DataFrame): # we require at least Ymd required = ["year", "month", "day"] req = list(set(required) - set(arg._data.names)) if len(req): req = ",".join(req) raise ValueError( f"to assemble mappings requires at least that " f"[year, month, day] be specified: [{req}] " f"is missing" ) # replace passed column name with values in _unit_map unit = {k: get_units(k) for k in arg._data.names} unit_rev = {v: k for k, v in unit.items()} # keys we don't recognize excess = set(unit_rev.keys()) - set(_unit_map.values()) if len(excess): excess = ",".join(excess) raise ValueError( f"extra keys have been passed to the " f"datetime assemblage: [{excess}]" ) new_series = ( arg[unit_rev["year"]].astype("str") + "-" + arg[unit_rev["month"]].astype("str").str.zfill(2) + "-" + arg[unit_rev["day"]].astype("str").str.zfill(2) ) format = "%Y-%m-%d" col = new_series._column.as_datetime_column( "datetime64[s]", format=format ) for u in ["h", "m", "s", "ms", "us", "ns"]: value = unit_rev.get(u) if value is not None and value in arg: arg_col = arg._data[value] if arg_col.dtype.kind in ("f"): col = new_series._column.as_datetime_column( "datetime64[ns]", format=format ) break elif arg_col.dtype.kind in ("O"): if not cpp_is_integer(arg_col).all(): col = new_series._column.as_datetime_column( "datetime64[ns]", format=format ) break times_column = None for u in ["h", "m", "s", "ms", "us", "ns"]: value = unit_rev.get(u) if value is not None and value in arg: current_col = arg._data[value] # If the arg[value] is of int or # float dtype we don't want to type-cast if current_col.dtype.kind in ("O"): try: current_col = current_col.astype(dtype="int64") except ValueError: current_col = current_col.astype(dtype="float64") factor = as_device_scalar( column.datetime._numpy_to_pandas_conversion[u] / ( column.datetime._numpy_to_pandas_conversion["s"] if np.datetime_data(col.dtype)[0] == "s" else 1 ) ) if times_column is None: times_column = current_col * factor else: times_column = times_column + (current_col * factor) if times_column is not None: col = (col.astype(dtype="int64") + times_column).astype( dtype=col.dtype ) return cudf.Series(col, index=arg.index) elif isinstance(arg, cudf.Index): col = arg._values col = _process_col( col=col, unit=unit, dayfirst=dayfirst, infer_datetime_format=infer_datetime_format, format=format, ) return as_index(col, name=arg.name) elif isinstance(arg, cudf.Series): col = arg._column col = _process_col( col=col, unit=unit, dayfirst=dayfirst, infer_datetime_format=infer_datetime_format, format=format, ) return cudf.Series(col, index=arg.index, name=arg.name) else: col = column.as_column(arg) col = _process_col( col=col, unit=unit, dayfirst=dayfirst, infer_datetime_format=infer_datetime_format, format=format, ) if is_scalar(arg): return col[0] else: return as_index(col) except Exception as e: if errors == "raise": raise e elif errors == "warn": import traceback tb = traceback.format_exc() warnings.warn(tb) elif errors == "ignore": pass elif errors == "coerce": return np.datetime64("nat", "ns" if unit is None else unit) return arg
def _process_col(col, unit, dayfirst, infer_datetime_format, format): if col.dtype.kind == "M": return col elif col.dtype.kind == "m": raise TypeError( f"dtype {col.dtype} cannot be converted to {_unit_dtype_map[unit]}" ) if col.dtype.kind in ("f"): if unit not in (None, "ns"): factor = as_device_scalar( column.datetime._numpy_to_pandas_conversion[unit] ) col = col * factor if format is not None: # Converting to int because, # pandas actually creates a datetime column # out of float values and then creates an # int column out of it to parse against `format`. # Instead we directly cast to int and perform # parsing against `format`. col = ( col.astype("int") .astype("str") .as_datetime_column( dtype="datetime64[us]" if "%f" in format else "datetime64[s]", format=format, ) ) else: col = col.as_datetime_column(dtype="datetime64[ns]") if col.dtype.kind in ("i"): if unit in ("D", "h", "m"): factor = as_device_scalar( column.datetime._numpy_to_pandas_conversion[unit] / column.datetime._numpy_to_pandas_conversion["s"] ) col = col * factor if format is not None: col = col.astype("str").as_datetime_column( dtype=_unit_dtype_map[unit], format=format ) else: col = col.as_datetime_column(dtype=_unit_dtype_map[unit]) elif col.dtype.kind in ("O"): if unit not in (None, "ns"): try: col = col.astype(dtype="int64") except ValueError: col = col.astype(dtype="float64") return _process_col( col=col, unit=unit, dayfirst=dayfirst, infer_datetime_format=infer_datetime_format, format=format, ) else: if infer_datetime_format and format is None: format = column.datetime.infer_format( element=col[0], dayfirst=dayfirst, ) elif format is None: format = column.datetime.infer_format(element=col[0]) col = col.as_datetime_column( dtype=_unit_dtype_map[unit], format=format, ) return col
def components(self, index=None): """ Return a Dataframe of the components of the Timedeltas. Returns ------- DataFrame Examples -------- >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s')) >>> s = cudf.Series([12231312123, 1231231231, 1123236768712, 2135656, ... 3244334234], dtype='timedelta64[ms]') >>> s 0 141 days 13:35:12.123 1 14 days 06:00:31.231 2 13000 days 10:12:48.712 3 0 days 00:35:35.656 4 37 days 13:12:14.234 dtype: timedelta64[ms] >>> s.dt.components days hours minutes seconds milliseconds microseconds nanoseconds 0 141 13 35 12 123 0 0 1 14 6 0 31 231 0 0 2 13000 10 12 48 712 0 0 3 0 0 35 35 656 0 0 4 37 13 12 14 234 0 0 """ # noqa: E501 return cudf.DataFrame( data={ "days": self // as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["D"], "ns")), "hours": (self % as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["D"], "ns"))) // as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["h"], "ns")), "minutes": (self % as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["h"], "ns"))) // as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["m"], "ns")), "seconds": (self % as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["m"], "ns"))) // as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["s"], "ns")), "milliseconds": (self % as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["s"], "ns"))) // as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["ms"], "ns")), "microseconds": (self % as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["ms"], "ns"))) // as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["us"], "ns")), "nanoseconds": (self % as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["us"], "ns"))) // as_device_scalar( np.timedelta64(_numpy_to_pandas_conversion["ns"], "ns")), }, index=index, )