def test_is_datetime_dtypes(self): ts = pd.date_range('20130101', periods=3) tsa = pd.date_range('20130101', periods=3, tz='US/Eastern') assert is_datetime64_dtype('datetime64') assert is_datetime64_dtype('datetime64[ns]') assert is_datetime64_dtype(ts) assert not is_datetime64_dtype(tsa) assert not is_datetime64_ns_dtype('datetime64') assert is_datetime64_ns_dtype('datetime64[ns]') assert is_datetime64_ns_dtype(ts) assert is_datetime64_ns_dtype(tsa) assert is_datetime64_any_dtype('datetime64') assert is_datetime64_any_dtype('datetime64[ns]') assert is_datetime64_any_dtype(ts) assert is_datetime64_any_dtype(tsa) assert not is_datetime64tz_dtype('datetime64') assert not is_datetime64tz_dtype('datetime64[ns]') assert not is_datetime64tz_dtype(ts) assert is_datetime64tz_dtype(tsa) for tz in ['US/Eastern', 'UTC']: dtype = 'datetime64[ns, {}]'.format(tz) assert not is_datetime64_dtype(dtype) assert is_datetime64tz_dtype(dtype) assert is_datetime64_ns_dtype(dtype) assert is_datetime64_any_dtype(dtype)
def test_compat(self): self.assertTrue(is_datetime64tz_dtype(self.dtype)) self.assertTrue(is_datetime64tz_dtype('datetime64[ns, US/Eastern]')) self.assertTrue(is_datetime64_any_dtype(self.dtype)) self.assertTrue(is_datetime64_any_dtype('datetime64[ns, US/Eastern]')) self.assertTrue(is_datetime64_ns_dtype(self.dtype)) self.assertTrue(is_datetime64_ns_dtype('datetime64[ns, US/Eastern]')) self.assertFalse(is_datetime64_dtype(self.dtype)) self.assertFalse(is_datetime64_dtype('datetime64[ns, US/Eastern]'))
def test_compat(self): assert is_datetime64tz_dtype(self.dtype) assert is_datetime64tz_dtype('datetime64[ns, US/Eastern]') assert is_datetime64_any_dtype(self.dtype) assert is_datetime64_any_dtype('datetime64[ns, US/Eastern]') assert is_datetime64_ns_dtype(self.dtype) assert is_datetime64_ns_dtype('datetime64[ns, US/Eastern]') assert not is_datetime64_dtype(self.dtype) assert not is_datetime64_dtype('datetime64[ns, US/Eastern]')
def __rsub__(self, other): if is_datetime64_dtype(other) and is_timedelta64_dtype(self): # ndarray[datetime64] cannot be subtracted from self, so # we need to wrap in DatetimeIndex and flip the operation from pandas import DatetimeIndex return DatetimeIndex(other) - self elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and not is_datetime64_any_dtype(other)): # GH#19959 datetime - datetime is well-defined as timedelta, # but any other type - datetime is not well-defined. raise TypeError("cannot subtract {cls} from {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) return -(self - other)
def __rsub__(self, other): if is_datetime64_dtype(other) and is_timedelta64_dtype(self): # ndarray[datetime64] cannot be subtracted from self, so # we need to wrap in DatetimeArray/Index and flip the operation if not isinstance(other, DatetimeLikeArrayMixin): # Avoid down-casting DatetimeIndex from pandas.core.arrays import DatetimeArrayMixin other = DatetimeArrayMixin(other) return other - self elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and not is_datetime64_any_dtype(other)): # GH#19959 datetime - datetime is well-defined as timedelta, # but any other type - datetime is not well-defined. raise TypeError("cannot subtract {cls} from {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) return -(self - other)
def test_is_datetime64_any_dtype(): assert not com.is_datetime64_any_dtype(int) assert not com.is_datetime64_any_dtype(str) assert not com.is_datetime64_any_dtype(np.array([1, 2])) assert not com.is_datetime64_any_dtype(np.array(['a', 'b'])) assert com.is_datetime64_any_dtype(np.datetime64) assert com.is_datetime64_any_dtype(np.array([], dtype=np.datetime64)) assert com.is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) assert com.is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype=np.datetime64))
def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) if is_datetime64_any_dtype(dtype): # 'how' is index-specific, isn't part of the EA interface. tz = getattr(dtype, 'tz', None) return self.to_timestamp(how=how).tz_localize(tz) # TODO: should probably raise on `how` here, so we don't ignore it. return super(PeriodIndex, self).astype(dtype, copy=copy)
def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) if is_integer_dtype(dtype): return self._int64index.copy() if copy else self._int64index elif is_datetime64_any_dtype(dtype): tz = getattr(dtype, 'tz', None) return self.to_timestamp(how=how).tz_localize(tz) elif is_period_dtype(dtype): return self.asfreq(freq=dtype.freq) return super(PeriodIndex, self).astype(dtype, copy=copy)
def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) # We have a few special-cases for `dtype`. # Failing those, we fall back to astyping the values if is_datetime64_any_dtype(dtype): # 'how' is index-speicifc, isn't part of the EA interface. tz = getattr(dtype, 'tz', None) return self.to_timestamp(how=how).tz_localize(tz) result = self._data.astype(dtype, copy=copy) return Index(result, name=self.name, dtype=dtype, copy=False)
def __setitem__(self, key, value): # na value: need special casing to set directly on numpy arrays needs_float_conversion = False if is_scalar(value) and isna(value): if is_integer_dtype(self.dtype.subtype): # can't set NaN on a numpy integer array needs_float_conversion = True elif is_datetime64_any_dtype(self.dtype.subtype): # need proper NaT to set directly on the numpy array value = np.datetime64('NaT') elif is_timedelta64_dtype(self.dtype.subtype): # need proper NaT to set directly on the numpy array value = np.timedelta64('NaT') value_left, value_right = value, value # scalar interval elif is_interval_dtype(value) or isinstance(value, ABCInterval): self._check_closed_matches(value, name="value") value_left, value_right = value.left, value.right else: # list-like of intervals try: array = IntervalArray(value) value_left, value_right = array.left, array.right except TypeError: # wrong type: not interval or NA msg = "'value' should be an interval type, got {} instead." raise TypeError(msg.format(type(value))) # Need to ensure that left and right are updated atomically, so we're # forced to copy, update the copy, and swap in the new values. left = self.left.copy(deep=True) if needs_float_conversion: left = left.astype('float') left.values[key] = value_left self._left = left right = self.right.copy(deep=True) if needs_float_conversion: right = right.astype('float') right.values[key] = value_right self._right = right
def __array__(self, dtype=None, copy=True): fill_value = self.fill_value if self.sp_index.ngaps == 0: # Compat for na dtype and int values. return self.sp_values if dtype is None: # Can NumPy represent this type? # If not, `np.result_type` will raise. We catch that # and return object. if is_datetime64_any_dtype(self.sp_values.dtype): # However, we *do* special-case the common case of # a datetime64 with pandas NaT. if fill_value is pd.NaT: # Can't put pd.NaT in a datetime64[ns] fill_value = np.datetime64('NaT') try: dtype = np.result_type(self.sp_values.dtype, fill_value) except TypeError: dtype = object out = np.full(self.shape, fill_value, dtype=dtype) out[self.sp_index.to_int_index().indices] = self.sp_values return out
def plot( trj: TrajaDataFrame, n_coords: Optional[int] = None, show_time: bool = False, accessor: Optional[traja.TrajaAccessor] = None, ax=None, **kwargs, ) -> matplotlib.collections.PathCollection: """Plot trajectory for single animal over period. Args: trj (:class:`traja.TrajaDataFrame`): trajectory n_coords (int, optional): Number of coordinates to plot show_time (bool): Show colormap as time accessor (:class:`~traja.accessor.TrajaAccessor`, optional): TrajaAccessor instance ax (:class:`~matplotlib.axes.Axes`): axes for plotting interactive (bool): show plot immediately **kwargs: additional keyword arguments to :meth:`matplotlib.axes.Axes.scatter` Returns: collection (:class:`~matplotlib.collections.PathCollection`): collection that was plotted """ import matplotlib.patches as patches from matplotlib.path import Path after_plot_args, kwargs = _get_after_plot_args(**kwargs) GRAY = "#999999" xlim = kwargs.pop("xlim", None) ylim = kwargs.pop("ylim", None) if not xlim or not ylim: xlim, ylim = traja.trajectory._get_xylim(trj) title = kwargs.pop("title", None) time_units = kwargs.pop("time_units", "s") fps = kwargs.pop("fps", None) figsize = kwargs.pop("figsize", None) coords = trj[["x", "y"]] time_col = traja.trajectory._get_time_col(trj) if time_col == "index": is_datetime = True else: is_datetime = is_datetime64_any_dtype(trj[time_col]) if time_col else False if n_coords is None: # Plot all coords start, end = 0, len(coords) verts = coords.iloc[start:end].values else: # Plot first `n_coords` verts = coords.iloc[:n_coords].values n_coords = len(verts) codes = [Path.MOVETO] + [Path.LINETO] * (len(verts) - 1) path = Path(verts, codes) if not ax: fig, ax = plt.subplots(figsize=figsize) fig.canvas.draw() patch = patches.PathPatch(path, edgecolor=GRAY, facecolor="none", lw=3, alpha=0.3) ax.add_patch(patch) xs, ys = zip(*verts) if time_col == "index": # DatetimeIndex determines color colors = [ind for ind, x in enumerate(trj.index[:n_coords])] elif time_col and time_col != "index": # `time_col` determines color colors = [ind for ind, x in enumerate(trj[time_col].iloc[:n_coords])] else: # Frame count determines color colors = trj.index[:n_coords] if time_col: # TODO: Calculate fps if not in datetime vmin = min(colors) vmax = max(colors) if is_datetime: # Show timestamps without units time_units = "" else: # Index/frame count is our only reference vmin = trj.index[0] vmax = trj.index[n_coords - 1] if not show_time: time_units = "" label = f"Time ({time_units})" if time_units else "" collection = ax.scatter( xs, ys, c=colors, s=kwargs.pop("s", 1), cmap=plt.cm.viridis, alpha=0.7, vmin=vmin, vmax=vmax, **kwargs, ) ax.set_xlim(xlim) ax.set_ylim(ylim) if kwargs.pop("invert_yaxis", None): plt.gca().invert_yaxis() _label_axes(trj, ax) ax.set_title(title) ax.set_aspect("equal") # Number of color bar ticks CBAR_TICKS = 10 if n_coords > 20 else n_coords indices = np.linspace(0, n_coords - 1, CBAR_TICKS, endpoint=True, dtype=int) cbar = plt.colorbar( collection, fraction=0.046, pad=0.04, orientation="vertical", label=label ) # Get colorbar labels from time if time_col == "index": if is_datetime64_any_dtype(trj.index): cbar_labels = ( trj.index[indices].strftime("%Y-%m-%d %H:%M:%S").values.astype(str) ) elif is_timedelta64_dtype(trj.index): if time_units in ("s", "", None): cbar_labels = [round(x, 2) for x in trj.index[indices].total_seconds()] else: logger.error("Time unit {} not yet implemented".format(time_units)) else: raise NotImplementedError( "Indexing on {} is not yet implemented".format(type(trj.index)) ) elif time_col and is_timedelta64_dtype(trj[time_col]): cbar_labels = trj[time_col].iloc[indices].dt.total_seconds().values cbar_labels = ["%.2f" % number for number in cbar_labels] elif time_col and is_datetime: cbar_labels = ( trj[time_col] .iloc[indices] .dt.strftime("%Y-%m-%d %H:%M:%S") .values.astype(str) ) else: # Convert frames to time if time_col: cbar_labels = trj[time_col].iloc[indices].values else: cbar_labels = trj.index[indices].values cbar_labels = np.round(cbar_labels, 6) if fps is not None and fps > 0 and fps != 1 and show_time: cbar_labels = cbar_labels / fps cbar.set_ticks(indices) cbar.set_ticklabels(cbar_labels) plt.tight_layout() _process_after_plot_args(**after_plot_args) return collection
def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): assert kind in ['transform', 'aggregate'] # can we do this operation with our cython functions # if not raise NotImplementedError # we raise NotImplemented if this is an invalid operation # entirely, e.g. adding datetimes # categoricals are only 1d, so we # are not setup for dim transforming if is_categorical_dtype(values): raise NotImplementedError( "categoricals are not support in cython ops ATM") elif is_datetime64_any_dtype(values): if how in ['add', 'prod', 'cumsum', 'cumprod']: raise NotImplementedError( "datetime64 type does not support {} " "operations".format(how)) elif is_timedelta64_dtype(values): if how in ['prod', 'cumprod']: raise NotImplementedError( "timedelta64 type does not support {} " "operations".format(how)) arity = self._cython_arity.get(how, 1) vdim = values.ndim swapped = False if vdim == 1: values = values[:, None] out_shape = (self.ngroups, arity) else: if axis > 0: swapped = True values = values.swapaxes(0, axis) if arity > 1: raise NotImplementedError("arity of more than 1 is not " "supported for the 'how' argument") out_shape = (self.ngroups,) + values.shape[1:] is_datetimelike = needs_i8_conversion(values.dtype) is_numeric = is_numeric_dtype(values.dtype) if is_datetimelike: values = values.view('int64') is_numeric = True elif is_bool_dtype(values.dtype): values = ensure_float64(values) elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition if (values == iNaT).any(): values = ensure_float64(values) else: values = ensure_int64_or_float64(values) elif is_numeric and not is_complex_dtype(values): values = ensure_float64(values) else: values = values.astype(object) try: func = self._get_cython_function( kind, how, values, is_numeric) except NotImplementedError: if is_numeric: values = ensure_float64(values) func = self._get_cython_function( kind, how, values, is_numeric) else: raise if how == 'rank': out_dtype = 'float' else: if is_numeric: out_dtype = '{kind}{itemsize}'.format( kind=values.dtype.kind, itemsize=values.dtype.itemsize) else: out_dtype = 'object' labels, _, _ = self.group_info if kind == 'aggregate': result = _maybe_fill(np.empty(out_shape, dtype=out_dtype), fill_value=np.nan) counts = np.zeros(self.ngroups, dtype=np.int64) result = self._aggregate( result, counts, values, labels, func, is_numeric, is_datetimelike, min_count) elif kind == 'transform': result = _maybe_fill(np.empty_like(values, dtype=out_dtype), fill_value=np.nan) # TODO: min_count result = self._transform( result, values, labels, func, is_numeric, is_datetimelike, **kwargs) if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT if mask.any(): result = result.astype('float64') result[mask] = np.nan if (kind == 'aggregate' and self._filter_empty_groups and not counts.all()): if result.ndim == 2: try: result = lib.row_bool_subset( result, (counts > 0).view(np.uint8)) except ValueError: result = lib.row_bool_subset_object( ensure_object(result), (counts > 0).view(np.uint8)) else: result = result[counts > 0] if vdim == 1 and arity == 1: result = result[:, 0] if how in self._name_functions: # TODO names = self._name_functions[how]() else: names = None if swapped: result = result.swapaxes(0, axis) return result, names
def read_file( filepath: str, id: Optional[str] = None, xcol: Optional[str] = None, ycol: Optional[str] = None, parse_dates: Union[str, bool] = False, xlim: Optional[tuple] = None, ylim: Optional[tuple] = None, spatial_units: str = "m", fps: Optional[float] = None, **kwargs, ): """Convenience method wrapping pandas `read_csv` and initializing metadata. Args: filepath (str): path to csv file with `x`, `y` and `time` (optional) columns id (str): id for trajectory xcol (str): name of column containing x coordinates ycol (str): name of column containing y coordinates parse_dates (Union[list,bool]): The behavior is as follows: - boolean. if True -> try parsing the index. - list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 each as a separate date column. xlim (tuple): x limits (min,max) for plotting ylim (tuple): y limits (min,max) for plotting spatial_units (str): for plotting (eg, 'cm') fps (float): for time calculations **kwargs: Additional arguments for :meth:`pandas.read_csv`. Returns: traj_df (:class:`~traja.main.TrajaDataFrame`): Trajectory """ date_parser = kwargs.pop("date_parser", None) # TODO: Set index to first column containing 'time' df_test = pd.read_csv(filepath, nrows=10, parse_dates=parse_dates, infer_datetime_format=True) if xcol is not None or ycol is not None: if not xcol in df_test or ycol not in df_test: raise Exception(f"{xcol} or {ycol} not found as headers.") # Strip whitespace whitespace_cols = [c for c in df_test if " " in df_test[c].name] stripped_cols = {c: lambda x: x.strip() for c in whitespace_cols} converters = {**stripped_cols, **kwargs.pop("converters", {})} # Downcast to float32 # TODO: Benchmark float32 vs float64 for very big datasets float_cols = df_test.select_dtypes(include=[np.float]).columns float32_cols = {c: np.float32 for c in float_cols} # Convert string columns to categories string_cols = [c for c in df_test if df_test[c].dtype == str] category_cols = {c: "category" for c in string_cols} dtype = {**float32_cols, **category_cols, **kwargs.pop("dtype", {})} # Parse time column if present time_cols = [col for col in df_test.columns if "time" in col.lower()] time_col = time_cols[0] if time_cols else None if parse_dates and not date_parser and time_col: # try different parsers format_strs = [ "%Y-%m-%d %H:%M:%S:%f", "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", ] for format_str in format_strs: date_parser = lambda x: pd.datetime.strptime(x, format_str) try: df_test = pd.read_csv(filepath, date_parser=date_parser, nrows=10, parse_dates=[time_col]) except ValueError: pass if is_datetime64_any_dtype(df_test[time_col]): break elif is_timedelta64_dtype(df_test[time_col]): break else: # No datetime or timestamp column found date_parser = None if "csv" in filepath: trj = pd.read_csv( filepath, date_parser=date_parser, parse_dates=parse_dates or [time_col] if date_parser else False, converters=converters, dtype=dtype, **kwargs, ) # TODO: Replace default column renaming with user option if needed if time_col: trj.rename(columns={time_col: "time"}) elif fps is not None: time = np.array([x for x in trj.index], dtype=int) / fps trj["time"] = time else: # leave index as int frames pass if xcol and ycol: trj.rename(columns={xcol: "x", ycol: "y"}) else: # TODO: Implement for HDF5 and .npy files. raise NotImplementedError("Non-csv's not yet implemented") trj = TrajaDataFrame(trj) # Set meta properties of TrajaDataFrame metadata = dict( id=id, xlim=xlim, spatial_units=spatial_units, title=kwargs.get("title", None), xlabel=kwargs.get("xlabel", None), ylabel=kwargs.get("ylabel", None), fps=fps, ) trj.__dict__.update(**metadata) return trj
def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): assert kind in ["transform", "aggregate"] orig_values = values # can we do this operation with our cython functions # if not raise NotImplementedError # we raise NotImplemented if this is an invalid operation # entirely, e.g. adding datetimes # categoricals are only 1d, so we # are not setup for dim transforming if is_categorical_dtype(values) or is_sparse(values): raise NotImplementedError("{} dtype not supported".format(values.dtype)) elif is_datetime64_any_dtype(values): if how in ["add", "prod", "cumsum", "cumprod"]: raise NotImplementedError( "datetime64 type does not support {} operations".format(how) ) elif is_timedelta64_dtype(values): if how in ["prod", "cumprod"]: raise NotImplementedError( "timedelta64 type does not support {} operations".format(how) ) if is_datetime64tz_dtype(values.dtype): # Cast to naive; we'll cast back at the end of the function # TODO: possible need to reshape? kludge can be avoided when # 2D EA is allowed. values = values.view("M8[ns]") is_datetimelike = needs_i8_conversion(values.dtype) is_numeric = is_numeric_dtype(values.dtype) if is_datetimelike: values = values.view("int64") is_numeric = True elif is_bool_dtype(values.dtype): values = ensure_float64(values) elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition if (values == iNaT).any(): values = ensure_float64(values) else: values = ensure_int_or_float(values) elif is_numeric and not is_complex_dtype(values): values = ensure_float64(values) else: values = values.astype(object) arity = self._cython_arity.get(how, 1) vdim = values.ndim swapped = False if vdim == 1: values = values[:, None] out_shape = (self.ngroups, arity) else: if axis > 0: swapped = True assert axis == 1, axis values = values.T if arity > 1: raise NotImplementedError( "arity of more than 1 is not supported for the 'how' argument" ) out_shape = (self.ngroups,) + values.shape[1:] try: func = self._get_cython_function(kind, how, values, is_numeric) except NotImplementedError: if is_numeric: try: values = ensure_float64(values) except TypeError: if lib.infer_dtype(values, skipna=False) == "complex": values = values.astype(complex) else: raise func = self._get_cython_function(kind, how, values, is_numeric) else: raise if how == "rank": out_dtype = "float" else: if is_numeric: out_dtype = "{kind}{itemsize}".format( kind=values.dtype.kind, itemsize=values.dtype.itemsize ) else: out_dtype = "object" labels, _, _ = self.group_info if kind == "aggregate": result = _maybe_fill( np.empty(out_shape, dtype=out_dtype), fill_value=np.nan ) counts = np.zeros(self.ngroups, dtype=np.int64) result = self._aggregate( result, counts, values, labels, func, is_numeric, is_datetimelike, min_count, ) elif kind == "transform": result = _maybe_fill( np.empty_like(values, dtype=out_dtype), fill_value=np.nan ) # TODO: min_count result = self._transform( result, values, labels, func, is_numeric, is_datetimelike, **kwargs ) if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT if mask.any(): result = result.astype("float64") result[mask] = np.nan if kind == "aggregate" and self._filter_empty_groups and not counts.all(): assert result.ndim != 2 result = result[counts > 0] if vdim == 1 and arity == 1: result = result[:, 0] if how in self._name_functions: # TODO names = self._name_functions[how]() else: names = None if swapped: result = result.swapaxes(0, axis) if is_datetime64tz_dtype(orig_values.dtype): result = type(orig_values)(result.astype(np.int64), dtype=orig_values.dtype) elif is_datetimelike and kind == "aggregate": result = result.astype(orig_values.dtype) return result, names
def resample_time(trj: TrajaDataFrame, step_time: str, new_fps: Optional[bool] = None): """Returns a ``TrajaDataFrame`` resampled to consistent `step_time` intervals. ``step_time`` should be expressed as a number-time unit combination, eg "2S" for 2 seconds and ā2100Lā for 2100 milliseconds. Args: trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory step_time (str): step time interval / offset string (eg, '2S' (seconds), '50L' (milliseconds), '50N' (nanoseconds)) new_fps (bool, optional): new fps Results: trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory .. doctest:: >>> from traja import generate, resample_time >>> df = generate() >>> resampled = resample_time(df, '50L') # 50 milliseconds >>> resampled.head() # doctest: +NORMALIZE_WHITESPACE x y time 1970-01-01 00:00:00.000 0.000000 0.000000 1970-01-01 00:00:00.050 0.999571 4.293384 1970-01-01 00:00:00.100 -1.298510 5.423373 1970-01-01 00:00:00.150 -6.056916 4.874502 1970-01-01 00:00:00.200 -10.347759 2.108385 """ time_col = _get_time_col(trj) if time_col == "index" and is_datetime64_any_dtype(trj.index): _trj = _resample_time(trj, step_time) elif time_col == "index" and is_timedelta64_dtype(trj.index): trj.index = pd.to_datetime(trj.index) _trj = _resample_time(trj, step_time) _trj.index = pd.to_timedelta(_trj.index) elif time_col: if isinstance(step_time, str): try: if "." in step_time: raise NotImplementedError( """Fractional step time not implemented. For milliseconds/microseconds/nanoseconds use: L milliseonds U microseconds N nanoseconds eg, step_time='2100L'""") except Exception: raise NotImplementedError( f"Inferring from time format {step_time} not yet implemented." ) _trj = trj.set_index(time_col) time_units = _trj.__dict__.get("time_units", "s") _trj.index = pd.to_datetime(_trj.index, unit=time_units) _trj = _resample_time(_trj, step_time) else: raise NotImplementedError( f"Time column ({time_col}) not of expected datasets type.") return _trj
def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): assert kind in ['transform', 'aggregate'] # can we do this operation with our cython functions # if not raise NotImplementedError # we raise NotImplemented if this is an invalid operation # entirely, e.g. adding datetimes # categoricals are only 1d, so we # are not setup for dim transforming if is_categorical_dtype(values): raise NotImplementedError( "categoricals are not support in cython ops ATM") elif is_datetime64_any_dtype(values): if how in ['add', 'prod', 'cumsum', 'cumprod']: raise NotImplementedError( "datetime64 type does not support {} " "operations".format(how)) elif is_timedelta64_dtype(values): if how in ['prod', 'cumprod']: raise NotImplementedError( "timedelta64 type does not support {} " "operations".format(how)) arity = self._cython_arity.get(how, 1) vdim = values.ndim swapped = False if vdim == 1: values = values[:, None] out_shape = (self.ngroups, arity) else: if axis > 0: swapped = True values = values.swapaxes(0, axis) if arity > 1: raise NotImplementedError("arity of more than 1 is not " "supported for the 'how' argument") out_shape = (self.ngroups, ) + values.shape[1:] is_datetimelike = needs_i8_conversion(values.dtype) is_numeric = is_numeric_dtype(values.dtype) if is_datetimelike: values = values.view('int64') is_numeric = True elif is_bool_dtype(values.dtype): values = ensure_float64(values) elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition if (values == iNaT).any(): values = ensure_float64(values) else: values = ensure_int64_or_float64(values) elif is_numeric and not is_complex_dtype(values): values = ensure_float64(values) else: values = values.astype(object) try: func = self._get_cython_function(kind, how, values, is_numeric) except NotImplementedError: if is_numeric: values = ensure_float64(values) func = self._get_cython_function(kind, how, values, is_numeric) else: raise if how == 'rank': out_dtype = 'float' else: if is_numeric: out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize) else: out_dtype = 'object' labels, _, _ = self.group_info if kind == 'aggregate': result = _maybe_fill(np.empty(out_shape, dtype=out_dtype), fill_value=np.nan) counts = np.zeros(self.ngroups, dtype=np.int64) result = self._aggregate(result, counts, values, labels, func, is_numeric, is_datetimelike, min_count) elif kind == 'transform': result = _maybe_fill(np.empty_like(values, dtype=out_dtype), fill_value=np.nan) # TODO: min_count result = self._transform(result, values, labels, func, is_numeric, is_datetimelike, **kwargs) if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT if mask.any(): result = result.astype('float64') result[mask] = np.nan if (kind == 'aggregate' and self._filter_empty_groups and not counts.all()): if result.ndim == 2: try: result = lib.row_bool_subset(result, (counts > 0).view(np.uint8)) except ValueError: result = lib.row_bool_subset_object( ensure_object(result), (counts > 0).view(np.uint8)) else: result = result[counts > 0] if vdim == 1 and arity == 1: result = result[:, 0] if how in self._name_functions: # TODO names = self._name_functions[how]() else: names = None if swapped: result = result.swapaxes(0, axis) return result, names
def _cython_operation(self, kind: str, values, how: str, axis, min_count: int = -1, **kwargs) -> Tuple[np.ndarray, Optional[List[str]]]: """ Returns the values of a cython operation as a Tuple of [data, names]. Names is only useful when dealing with 2D results, like ohlc (see self._name_functions). """ assert kind in ["transform", "aggregate"] orig_values = values if values.ndim > 2: raise NotImplementedError( "number of dimensions is currently limited to 2") elif values.ndim == 2: # Note: it is *not* the case that axis is always 0 for 1-dim values, # as we can have 1D ExtensionArrays that we need to treat as 2D assert axis == 1, axis # can we do this operation with our cython functions # if not raise NotImplementedError # we raise NotImplemented if this is an invalid operation # entirely, e.g. adding datetimes # categoricals are only 1d, so we # are not setup for dim transforming if is_categorical_dtype(values) or is_sparse(values): raise NotImplementedError(f"{values.dtype} dtype not supported") elif is_datetime64_any_dtype(values): if how in ["add", "prod", "cumsum", "cumprod"]: raise NotImplementedError( f"datetime64 type does not support {how} operations") elif is_timedelta64_dtype(values): if how in ["prod", "cumprod"]: raise NotImplementedError( f"timedelta64 type does not support {how} operations") if is_datetime64tz_dtype(values.dtype): # Cast to naive; we'll cast back at the end of the function # TODO: possible need to reshape? kludge can be avoided when # 2D EA is allowed. values = values.view("M8[ns]") is_datetimelike = needs_i8_conversion(values.dtype) is_numeric = is_numeric_dtype(values.dtype) if is_datetimelike: values = values.view("int64") is_numeric = True elif is_bool_dtype(values.dtype): values = ensure_float64(values) elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition if (values == iNaT).any(): values = ensure_float64(values) else: values = ensure_int_or_float(values) elif is_numeric and not is_complex_dtype(values): values = ensure_float64(values) else: values = values.astype(object) arity = self._cython_arity.get(how, 1) vdim = values.ndim swapped = False if vdim == 1: values = values[:, None] out_shape = (self.ngroups, arity) else: if axis > 0: swapped = True assert axis == 1, axis values = values.T if arity > 1: raise NotImplementedError( "arity of more than 1 is not supported for the 'how' argument" ) out_shape = (self.ngroups, ) + values.shape[1:] func, values = self._get_cython_func_and_vals(kind, how, values, is_numeric) if how == "rank": out_dtype = "float" else: if is_numeric: out_dtype = f"{values.dtype.kind}{values.dtype.itemsize}" else: out_dtype = "object" codes, _, _ = self.group_info if kind == "aggregate": result = _maybe_fill(np.empty(out_shape, dtype=out_dtype), fill_value=np.nan) counts = np.zeros(self.ngroups, dtype=np.int64) result = self._aggregate(result, counts, values, codes, func, is_datetimelike, min_count) elif kind == "transform": result = _maybe_fill(np.empty_like(values, dtype=out_dtype), fill_value=np.nan) # TODO: min_count result = self._transform(result, values, codes, func, is_datetimelike, **kwargs) if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT if mask.any(): result = result.astype("float64") result[mask] = np.nan elif (how == "add" and is_integer_dtype(orig_values.dtype) and is_extension_array_dtype(orig_values.dtype)): # We need this to ensure that Series[Int64Dtype].resample().sum() # remains int64 dtype. # Two options for avoiding this special case # 1. mask-aware ops and avoid casting to float with NaN above # 2. specify the result dtype when calling this method result = result.astype("int64") if kind == "aggregate" and self._filter_empty_groups and not counts.all( ): assert result.ndim != 2 result = result[counts > 0] if vdim == 1 and arity == 1: result = result[:, 0] names: Optional[List[str]] = self._name_functions.get(how, None) if swapped: result = result.swapaxes(0, axis) if is_datetime64tz_dtype(orig_values.dtype): result = type(orig_values)(result.astype(np.int64), dtype=orig_values.dtype) elif is_datetimelike and kind == "aggregate": result = result.astype(orig_values.dtype) return result, names