Beispiel #1
0
    def test_is_datetime_dtypes(self):

        ts = pd.date_range('20130101', periods=3)
        tsa = pd.date_range('20130101', periods=3, tz='US/Eastern')

        assert is_datetime64_dtype('datetime64')
        assert is_datetime64_dtype('datetime64[ns]')
        assert is_datetime64_dtype(ts)
        assert not is_datetime64_dtype(tsa)

        assert not is_datetime64_ns_dtype('datetime64')
        assert is_datetime64_ns_dtype('datetime64[ns]')
        assert is_datetime64_ns_dtype(ts)
        assert is_datetime64_ns_dtype(tsa)

        assert is_datetime64_any_dtype('datetime64')
        assert is_datetime64_any_dtype('datetime64[ns]')
        assert is_datetime64_any_dtype(ts)
        assert is_datetime64_any_dtype(tsa)

        assert not is_datetime64tz_dtype('datetime64')
        assert not is_datetime64tz_dtype('datetime64[ns]')
        assert not is_datetime64tz_dtype(ts)
        assert is_datetime64tz_dtype(tsa)

        for tz in ['US/Eastern', 'UTC']:
            dtype = 'datetime64[ns, {}]'.format(tz)
            assert not is_datetime64_dtype(dtype)
            assert is_datetime64tz_dtype(dtype)
            assert is_datetime64_ns_dtype(dtype)
            assert is_datetime64_any_dtype(dtype)
Beispiel #2
0
 def test_compat(self):
     self.assertTrue(is_datetime64tz_dtype(self.dtype))
     self.assertTrue(is_datetime64tz_dtype('datetime64[ns, US/Eastern]'))
     self.assertTrue(is_datetime64_any_dtype(self.dtype))
     self.assertTrue(is_datetime64_any_dtype('datetime64[ns, US/Eastern]'))
     self.assertTrue(is_datetime64_ns_dtype(self.dtype))
     self.assertTrue(is_datetime64_ns_dtype('datetime64[ns, US/Eastern]'))
     self.assertFalse(is_datetime64_dtype(self.dtype))
     self.assertFalse(is_datetime64_dtype('datetime64[ns, US/Eastern]'))
Beispiel #3
0
 def test_compat(self):
     assert is_datetime64tz_dtype(self.dtype)
     assert is_datetime64tz_dtype('datetime64[ns, US/Eastern]')
     assert is_datetime64_any_dtype(self.dtype)
     assert is_datetime64_any_dtype('datetime64[ns, US/Eastern]')
     assert is_datetime64_ns_dtype(self.dtype)
     assert is_datetime64_ns_dtype('datetime64[ns, US/Eastern]')
     assert not is_datetime64_dtype(self.dtype)
     assert not is_datetime64_dtype('datetime64[ns, US/Eastern]')
Beispiel #4
0
 def __rsub__(self, other):
     if is_datetime64_dtype(other) and is_timedelta64_dtype(self):
         # ndarray[datetime64] cannot be subtracted from self, so
         # we need to wrap in DatetimeIndex and flip the operation
         from pandas import DatetimeIndex
         return DatetimeIndex(other) - self
     elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and
           not is_datetime64_any_dtype(other)):
         # GH#19959 datetime - datetime is well-defined as timedelta,
         # but any other type - datetime is not well-defined.
         raise TypeError("cannot subtract {cls} from {typ}"
                         .format(cls=type(self).__name__,
                                 typ=type(other).__name__))
     return -(self - other)
Beispiel #5
0
 def __rsub__(self, other):
     if is_datetime64_dtype(other) and is_timedelta64_dtype(self):
         # ndarray[datetime64] cannot be subtracted from self, so
         # we need to wrap in DatetimeArray/Index and flip the operation
         if not isinstance(other, DatetimeLikeArrayMixin):
             # Avoid down-casting DatetimeIndex
             from pandas.core.arrays import DatetimeArrayMixin
             other = DatetimeArrayMixin(other)
         return other - self
     elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and
           not is_datetime64_any_dtype(other)):
         # GH#19959 datetime - datetime is well-defined as timedelta,
         # but any other type - datetime is not well-defined.
         raise TypeError("cannot subtract {cls} from {typ}"
                         .format(cls=type(self).__name__,
                                 typ=type(other).__name__))
     return -(self - other)
Beispiel #6
0
def test_is_datetime64_any_dtype():
    assert not com.is_datetime64_any_dtype(int)
    assert not com.is_datetime64_any_dtype(str)
    assert not com.is_datetime64_any_dtype(np.array([1, 2]))
    assert not com.is_datetime64_any_dtype(np.array(['a', 'b']))

    assert com.is_datetime64_any_dtype(np.datetime64)
    assert com.is_datetime64_any_dtype(np.array([], dtype=np.datetime64))
    assert com.is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern"))
    assert com.is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3],
                                                        dtype=np.datetime64))
Beispiel #7
0
    def astype(self, dtype, copy=True, how='start'):
        dtype = pandas_dtype(dtype)

        if is_datetime64_any_dtype(dtype):
            # 'how' is index-specific, isn't part of the EA interface.
            tz = getattr(dtype, 'tz', None)
            return self.to_timestamp(how=how).tz_localize(tz)

        # TODO: should probably raise on `how` here, so we don't ignore it.
        return super(PeriodIndex, self).astype(dtype, copy=copy)
Beispiel #8
0
 def astype(self, dtype, copy=True, how='start'):
     dtype = pandas_dtype(dtype)
     if is_integer_dtype(dtype):
         return self._int64index.copy() if copy else self._int64index
     elif is_datetime64_any_dtype(dtype):
         tz = getattr(dtype, 'tz', None)
         return self.to_timestamp(how=how).tz_localize(tz)
     elif is_period_dtype(dtype):
         return self.asfreq(freq=dtype.freq)
     return super(PeriodIndex, self).astype(dtype, copy=copy)
Beispiel #9
0
    def astype(self, dtype, copy=True, how='start'):
        dtype = pandas_dtype(dtype)

        # We have a few special-cases for `dtype`.
        # Failing those, we fall back to astyping the values

        if is_datetime64_any_dtype(dtype):
            # 'how' is index-speicifc, isn't part of the EA interface.
            tz = getattr(dtype, 'tz', None)
            return self.to_timestamp(how=how).tz_localize(tz)

        result = self._data.astype(dtype, copy=copy)
        return Index(result, name=self.name, dtype=dtype, copy=False)
Beispiel #10
0
    def __setitem__(self, key, value):
        # na value: need special casing to set directly on numpy arrays
        needs_float_conversion = False
        if is_scalar(value) and isna(value):
            if is_integer_dtype(self.dtype.subtype):
                # can't set NaN on a numpy integer array
                needs_float_conversion = True
            elif is_datetime64_any_dtype(self.dtype.subtype):
                # need proper NaT to set directly on the numpy array
                value = np.datetime64('NaT')
            elif is_timedelta64_dtype(self.dtype.subtype):
                # need proper NaT to set directly on the numpy array
                value = np.timedelta64('NaT')
            value_left, value_right = value, value

        # scalar interval
        elif is_interval_dtype(value) or isinstance(value, ABCInterval):
            self._check_closed_matches(value, name="value")
            value_left, value_right = value.left, value.right

        else:
            # list-like of intervals
            try:
                array = IntervalArray(value)
                value_left, value_right = array.left, array.right
            except TypeError:
                # wrong type: not interval or NA
                msg = "'value' should be an interval type, got {} instead."
                raise TypeError(msg.format(type(value)))

        # Need to ensure that left and right are updated atomically, so we're
        # forced to copy, update the copy, and swap in the new values.
        left = self.left.copy(deep=True)
        if needs_float_conversion:
            left = left.astype('float')
        left.values[key] = value_left
        self._left = left

        right = self.right.copy(deep=True)
        if needs_float_conversion:
            right = right.astype('float')
        right.values[key] = value_right
        self._right = right
Beispiel #11
0
    def __array__(self, dtype=None, copy=True):
        fill_value = self.fill_value

        if self.sp_index.ngaps == 0:
            # Compat for na dtype and int values.
            return self.sp_values
        if dtype is None:
            # Can NumPy represent this type?
            # If not, `np.result_type` will raise. We catch that
            # and return object.
            if is_datetime64_any_dtype(self.sp_values.dtype):
                # However, we *do* special-case the common case of
                # a datetime64 with pandas NaT.
                if fill_value is pd.NaT:
                    # Can't put pd.NaT in a datetime64[ns]
                    fill_value = np.datetime64('NaT')
            try:
                dtype = np.result_type(self.sp_values.dtype, fill_value)
            except TypeError:
                dtype = object

        out = np.full(self.shape, fill_value, dtype=dtype)
        out[self.sp_index.to_int_index().indices] = self.sp_values
        return out
Beispiel #12
0
def plot(
    trj: TrajaDataFrame,
    n_coords: Optional[int] = None,
    show_time: bool = False,
    accessor: Optional[traja.TrajaAccessor] = None,
    ax=None,
    **kwargs,
) -> matplotlib.collections.PathCollection:
    """Plot trajectory for single animal over period.

    Args:
      trj (:class:`traja.TrajaDataFrame`): trajectory
      n_coords (int, optional): Number of coordinates to plot
      show_time (bool): Show colormap as time
      accessor (:class:`~traja.accessor.TrajaAccessor`, optional): TrajaAccessor instance
      ax (:class:`~matplotlib.axes.Axes`): axes for plotting
      interactive (bool): show plot immediately
      **kwargs: additional keyword arguments to :meth:`matplotlib.axes.Axes.scatter`

    Returns:
        collection (:class:`~matplotlib.collections.PathCollection`): collection that was plotted

    """
    import matplotlib.patches as patches
    from matplotlib.path import Path

    after_plot_args, kwargs = _get_after_plot_args(**kwargs)

    GRAY = "#999999"

    xlim = kwargs.pop("xlim", None)
    ylim = kwargs.pop("ylim", None)
    if not xlim or not ylim:
        xlim, ylim = traja.trajectory._get_xylim(trj)

    title = kwargs.pop("title", None)
    time_units = kwargs.pop("time_units", "s")
    fps = kwargs.pop("fps", None)
    figsize = kwargs.pop("figsize", None)

    coords = trj[["x", "y"]]
    time_col = traja.trajectory._get_time_col(trj)

    if time_col == "index":
        is_datetime = True
    else:
        is_datetime = is_datetime64_any_dtype(trj[time_col]) if time_col else False

    if n_coords is None:
        # Plot all coords
        start, end = 0, len(coords)
        verts = coords.iloc[start:end].values
    else:
        # Plot first `n_coords`
        verts = coords.iloc[:n_coords].values

    n_coords = len(verts)

    codes = [Path.MOVETO] + [Path.LINETO] * (len(verts) - 1)
    path = Path(verts, codes)

    if not ax:
        fig, ax = plt.subplots(figsize=figsize)
        fig.canvas.draw()

    patch = patches.PathPatch(path, edgecolor=GRAY, facecolor="none", lw=3, alpha=0.3)
    ax.add_patch(patch)

    xs, ys = zip(*verts)

    if time_col == "index":
        # DatetimeIndex determines color
        colors = [ind for ind, x in enumerate(trj.index[:n_coords])]
    elif time_col and time_col != "index":
        # `time_col` determines color
        colors = [ind for ind, x in enumerate(trj[time_col].iloc[:n_coords])]
    else:
        # Frame count determines color
        colors = trj.index[:n_coords]

    if time_col:
        # TODO: Calculate fps if not in datetime
        vmin = min(colors)
        vmax = max(colors)
        if is_datetime:
            # Show timestamps without units
            time_units = ""
    else:
        # Index/frame count is our only reference
        vmin = trj.index[0]
        vmax = trj.index[n_coords - 1]
        if not show_time:
            time_units = ""
    label = f"Time ({time_units})" if time_units else ""

    collection = ax.scatter(
        xs,
        ys,
        c=colors,
        s=kwargs.pop("s", 1),
        cmap=plt.cm.viridis,
        alpha=0.7,
        vmin=vmin,
        vmax=vmax,
        **kwargs,
    )

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    if kwargs.pop("invert_yaxis", None):
        plt.gca().invert_yaxis()

    _label_axes(trj, ax)
    ax.set_title(title)
    ax.set_aspect("equal")

    # Number of color bar ticks
    CBAR_TICKS = 10 if n_coords > 20 else n_coords
    indices = np.linspace(0, n_coords - 1, CBAR_TICKS, endpoint=True, dtype=int)
    cbar = plt.colorbar(
        collection, fraction=0.046, pad=0.04, orientation="vertical", label=label
    )

    # Get colorbar labels from time
    if time_col == "index":
        if is_datetime64_any_dtype(trj.index):
            cbar_labels = (
                trj.index[indices].strftime("%Y-%m-%d %H:%M:%S").values.astype(str)
            )
        elif is_timedelta64_dtype(trj.index):
            if time_units in ("s", "", None):
                cbar_labels = [round(x, 2) for x in trj.index[indices].total_seconds()]
            else:
                logger.error("Time unit {} not yet implemented".format(time_units))
        else:
            raise NotImplementedError(
                "Indexing on {} is not yet implemented".format(type(trj.index))
            )
    elif time_col and is_timedelta64_dtype(trj[time_col]):
        cbar_labels = trj[time_col].iloc[indices].dt.total_seconds().values
        cbar_labels = ["%.2f" % number for number in cbar_labels]
    elif time_col and is_datetime:
        cbar_labels = (
            trj[time_col]
            .iloc[indices]
            .dt.strftime("%Y-%m-%d %H:%M:%S")
            .values.astype(str)
        )
    else:
        # Convert frames to time
        if time_col:
            cbar_labels = trj[time_col].iloc[indices].values
        else:
            cbar_labels = trj.index[indices].values
            cbar_labels = np.round(cbar_labels, 6)
        if fps is not None and fps > 0 and fps != 1 and show_time:
            cbar_labels = cbar_labels / fps

    cbar.set_ticks(indices)
    cbar.set_ticklabels(cbar_labels)
    plt.tight_layout()

    _process_after_plot_args(**after_plot_args)
    return collection
Beispiel #13
0
    def _cython_operation(self, kind, values, how, axis, min_count=-1,
                          **kwargs):
        assert kind in ['transform', 'aggregate']

        # can we do this operation with our cython functions
        # if not raise NotImplementedError

        # we raise NotImplemented if this is an invalid operation
        # entirely, e.g. adding datetimes

        # categoricals are only 1d, so we
        # are not setup for dim transforming
        if is_categorical_dtype(values):
            raise NotImplementedError(
                "categoricals are not support in cython ops ATM")
        elif is_datetime64_any_dtype(values):
            if how in ['add', 'prod', 'cumsum', 'cumprod']:
                raise NotImplementedError(
                    "datetime64 type does not support {} "
                    "operations".format(how))
        elif is_timedelta64_dtype(values):
            if how in ['prod', 'cumprod']:
                raise NotImplementedError(
                    "timedelta64 type does not support {} "
                    "operations".format(how))

        arity = self._cython_arity.get(how, 1)

        vdim = values.ndim
        swapped = False
        if vdim == 1:
            values = values[:, None]
            out_shape = (self.ngroups, arity)
        else:
            if axis > 0:
                swapped = True
                values = values.swapaxes(0, axis)
            if arity > 1:
                raise NotImplementedError("arity of more than 1 is not "
                                          "supported for the 'how' argument")
            out_shape = (self.ngroups,) + values.shape[1:]

        is_datetimelike = needs_i8_conversion(values.dtype)
        is_numeric = is_numeric_dtype(values.dtype)

        if is_datetimelike:
            values = values.view('int64')
            is_numeric = True
        elif is_bool_dtype(values.dtype):
            values = ensure_float64(values)
        elif is_integer_dtype(values):
            # we use iNaT for the missing value on ints
            # so pre-convert to guard this condition
            if (values == iNaT).any():
                values = ensure_float64(values)
            else:
                values = ensure_int64_or_float64(values)
        elif is_numeric and not is_complex_dtype(values):
            values = ensure_float64(values)
        else:
            values = values.astype(object)

        try:
            func = self._get_cython_function(
                kind, how, values, is_numeric)
        except NotImplementedError:
            if is_numeric:
                values = ensure_float64(values)
                func = self._get_cython_function(
                    kind, how, values, is_numeric)
            else:
                raise

        if how == 'rank':
            out_dtype = 'float'
        else:
            if is_numeric:
                out_dtype = '{kind}{itemsize}'.format(
                    kind=values.dtype.kind, itemsize=values.dtype.itemsize)
            else:
                out_dtype = 'object'

        labels, _, _ = self.group_info

        if kind == 'aggregate':
            result = _maybe_fill(np.empty(out_shape, dtype=out_dtype),
                                 fill_value=np.nan)
            counts = np.zeros(self.ngroups, dtype=np.int64)
            result = self._aggregate(
                result, counts, values, labels, func, is_numeric,
                is_datetimelike, min_count)
        elif kind == 'transform':
            result = _maybe_fill(np.empty_like(values, dtype=out_dtype),
                                 fill_value=np.nan)

            # TODO: min_count
            result = self._transform(
                result, values, labels, func, is_numeric, is_datetimelike,
                **kwargs)

        if is_integer_dtype(result) and not is_datetimelike:
            mask = result == iNaT
            if mask.any():
                result = result.astype('float64')
                result[mask] = np.nan

        if (kind == 'aggregate' and
                self._filter_empty_groups and not counts.all()):
            if result.ndim == 2:
                try:
                    result = lib.row_bool_subset(
                        result, (counts > 0).view(np.uint8))
                except ValueError:
                    result = lib.row_bool_subset_object(
                        ensure_object(result),
                        (counts > 0).view(np.uint8))
            else:
                result = result[counts > 0]

        if vdim == 1 and arity == 1:
            result = result[:, 0]

        if how in self._name_functions:
            # TODO
            names = self._name_functions[how]()
        else:
            names = None

        if swapped:
            result = result.swapaxes(0, axis)

        return result, names
Beispiel #14
0
def read_file(
    filepath: str,
    id: Optional[str] = None,
    xcol: Optional[str] = None,
    ycol: Optional[str] = None,
    parse_dates: Union[str, bool] = False,
    xlim: Optional[tuple] = None,
    ylim: Optional[tuple] = None,
    spatial_units: str = "m",
    fps: Optional[float] = None,
    **kwargs,
):
    """Convenience method wrapping pandas `read_csv` and initializing metadata.

    Args:
      filepath (str): path to csv file with `x`, `y` and `time` (optional) columns
      id (str): id for trajectory
      xcol (str): name of column containing x coordinates
      ycol (str): name of column containing y coordinates
      parse_dates (Union[list,bool]): The behavior is as follows:
                                    - boolean. if True -> try parsing the index.
                                    - list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 each as a
                                    separate date column.
      xlim (tuple): x limits (min,max) for plotting
      ylim (tuple): y limits (min,max) for plotting
      spatial_units (str): for plotting (eg, 'cm')
      fps (float): for time calculations
      **kwargs: Additional arguments for :meth:`pandas.read_csv`.

    Returns:
        traj_df (:class:`~traja.main.TrajaDataFrame`): Trajectory

    """
    date_parser = kwargs.pop("date_parser", None)

    # TODO: Set index to first column containing 'time'
    df_test = pd.read_csv(filepath,
                          nrows=10,
                          parse_dates=parse_dates,
                          infer_datetime_format=True)

    if xcol is not None or ycol is not None:
        if not xcol in df_test or ycol not in df_test:
            raise Exception(f"{xcol} or {ycol} not found as headers.")

    # Strip whitespace
    whitespace_cols = [c for c in df_test if " " in df_test[c].name]
    stripped_cols = {c: lambda x: x.strip() for c in whitespace_cols}
    converters = {**stripped_cols, **kwargs.pop("converters", {})}

    # Downcast to float32 # TODO: Benchmark float32 vs float64 for very big datasets
    float_cols = df_test.select_dtypes(include=[np.float]).columns
    float32_cols = {c: np.float32 for c in float_cols}

    # Convert string columns to categories
    string_cols = [c for c in df_test if df_test[c].dtype == str]
    category_cols = {c: "category" for c in string_cols}
    dtype = {**float32_cols, **category_cols, **kwargs.pop("dtype", {})}

    # Parse time column if present
    time_cols = [col for col in df_test.columns if "time" in col.lower()]
    time_col = time_cols[0] if time_cols else None

    if parse_dates and not date_parser and time_col:
        # try different parsers
        format_strs = [
            "%Y-%m-%d %H:%M:%S:%f",
            "%Y-%m-%d %H:%M:%S.%f",
            "%Y-%m-%d %H:%M:%S",
        ]
        for format_str in format_strs:
            date_parser = lambda x: pd.datetime.strptime(x, format_str)
            try:
                df_test = pd.read_csv(filepath,
                                      date_parser=date_parser,
                                      nrows=10,
                                      parse_dates=[time_col])
            except ValueError:
                pass
            if is_datetime64_any_dtype(df_test[time_col]):
                break
            elif is_timedelta64_dtype(df_test[time_col]):
                break
            else:
                # No datetime or timestamp column found
                date_parser = None

    if "csv" in filepath:
        trj = pd.read_csv(
            filepath,
            date_parser=date_parser,
            parse_dates=parse_dates or [time_col] if date_parser else False,
            converters=converters,
            dtype=dtype,
            **kwargs,
        )

        # TODO: Replace default column renaming with user option if needed
        if time_col:
            trj.rename(columns={time_col: "time"})
        elif fps is not None:
            time = np.array([x for x in trj.index], dtype=int) / fps
            trj["time"] = time
        else:
            # leave index as int frames
            pass
        if xcol and ycol:
            trj.rename(columns={xcol: "x", ycol: "y"})
    else:
        # TODO: Implement for HDF5 and .npy files.
        raise NotImplementedError("Non-csv's not yet implemented")

    trj = TrajaDataFrame(trj)

    # Set meta properties of TrajaDataFrame
    metadata = dict(
        id=id,
        xlim=xlim,
        spatial_units=spatial_units,
        title=kwargs.get("title", None),
        xlabel=kwargs.get("xlabel", None),
        ylabel=kwargs.get("ylabel", None),
        fps=fps,
    )
    trj.__dict__.update(**metadata)
    return trj
Beispiel #15
0
    def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
        assert kind in ["transform", "aggregate"]
        orig_values = values

        # can we do this operation with our cython functions
        # if not raise NotImplementedError

        # we raise NotImplemented if this is an invalid operation
        # entirely, e.g. adding datetimes

        # categoricals are only 1d, so we
        # are not setup for dim transforming
        if is_categorical_dtype(values) or is_sparse(values):
            raise NotImplementedError("{} dtype not supported".format(values.dtype))
        elif is_datetime64_any_dtype(values):
            if how in ["add", "prod", "cumsum", "cumprod"]:
                raise NotImplementedError(
                    "datetime64 type does not support {} operations".format(how)
                )
        elif is_timedelta64_dtype(values):
            if how in ["prod", "cumprod"]:
                raise NotImplementedError(
                    "timedelta64 type does not support {} operations".format(how)
                )

        if is_datetime64tz_dtype(values.dtype):
            # Cast to naive; we'll cast back at the end of the function
            # TODO: possible need to reshape?  kludge can be avoided when
            #  2D EA is allowed.
            values = values.view("M8[ns]")

        is_datetimelike = needs_i8_conversion(values.dtype)
        is_numeric = is_numeric_dtype(values.dtype)

        if is_datetimelike:
            values = values.view("int64")
            is_numeric = True
        elif is_bool_dtype(values.dtype):
            values = ensure_float64(values)
        elif is_integer_dtype(values):
            # we use iNaT for the missing value on ints
            # so pre-convert to guard this condition
            if (values == iNaT).any():
                values = ensure_float64(values)
            else:
                values = ensure_int_or_float(values)
        elif is_numeric and not is_complex_dtype(values):
            values = ensure_float64(values)
        else:
            values = values.astype(object)

        arity = self._cython_arity.get(how, 1)

        vdim = values.ndim
        swapped = False
        if vdim == 1:
            values = values[:, None]
            out_shape = (self.ngroups, arity)
        else:
            if axis > 0:
                swapped = True
                assert axis == 1, axis
                values = values.T
            if arity > 1:
                raise NotImplementedError(
                    "arity of more than 1 is not supported for the 'how' argument"
                )
            out_shape = (self.ngroups,) + values.shape[1:]

        try:
            func = self._get_cython_function(kind, how, values, is_numeric)
        except NotImplementedError:
            if is_numeric:
                try:
                    values = ensure_float64(values)
                except TypeError:
                    if lib.infer_dtype(values, skipna=False) == "complex":
                        values = values.astype(complex)
                    else:
                        raise
                func = self._get_cython_function(kind, how, values, is_numeric)
            else:
                raise

        if how == "rank":
            out_dtype = "float"
        else:
            if is_numeric:
                out_dtype = "{kind}{itemsize}".format(
                    kind=values.dtype.kind, itemsize=values.dtype.itemsize
                )
            else:
                out_dtype = "object"

        labels, _, _ = self.group_info

        if kind == "aggregate":
            result = _maybe_fill(
                np.empty(out_shape, dtype=out_dtype), fill_value=np.nan
            )
            counts = np.zeros(self.ngroups, dtype=np.int64)
            result = self._aggregate(
                result,
                counts,
                values,
                labels,
                func,
                is_numeric,
                is_datetimelike,
                min_count,
            )
        elif kind == "transform":
            result = _maybe_fill(
                np.empty_like(values, dtype=out_dtype), fill_value=np.nan
            )

            # TODO: min_count
            result = self._transform(
                result, values, labels, func, is_numeric, is_datetimelike, **kwargs
            )

        if is_integer_dtype(result) and not is_datetimelike:
            mask = result == iNaT
            if mask.any():
                result = result.astype("float64")
                result[mask] = np.nan

        if kind == "aggregate" and self._filter_empty_groups and not counts.all():
            assert result.ndim != 2
            result = result[counts > 0]

        if vdim == 1 and arity == 1:
            result = result[:, 0]

        if how in self._name_functions:
            # TODO
            names = self._name_functions[how]()
        else:
            names = None

        if swapped:
            result = result.swapaxes(0, axis)

        if is_datetime64tz_dtype(orig_values.dtype):
            result = type(orig_values)(result.astype(np.int64), dtype=orig_values.dtype)
        elif is_datetimelike and kind == "aggregate":
            result = result.astype(orig_values.dtype)

        return result, names
Beispiel #16
0
def resample_time(trj: TrajaDataFrame,
                  step_time: str,
                  new_fps: Optional[bool] = None):
    """Returns a ``TrajaDataFrame`` resampled to consistent `step_time` intervals.

    ``step_time`` should be expressed as a number-time unit combination, eg "2S" for 2 seconds and “2100L” for 2100 milliseconds.

    Args:
        trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory
        step_time (str): step time interval / offset string (eg, '2S' (seconds), '50L' (milliseconds), '50N' (nanoseconds))
        new_fps (bool, optional): new fps

    Results:
        trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory


    .. doctest::  

        >>> from traja import generate, resample_time
        >>> df = generate()
        >>> resampled = resample_time(df, '50L') # 50 milliseconds
        >>> resampled.head() # doctest: +NORMALIZE_WHITESPACE
                                         x         y
        time                                        
        1970-01-01 00:00:00.000   0.000000  0.000000
        1970-01-01 00:00:00.050   0.999571  4.293384
        1970-01-01 00:00:00.100  -1.298510  5.423373
        1970-01-01 00:00:00.150  -6.056916  4.874502
        1970-01-01 00:00:00.200 -10.347759  2.108385
        
    """
    time_col = _get_time_col(trj)
    if time_col == "index" and is_datetime64_any_dtype(trj.index):
        _trj = _resample_time(trj, step_time)
    elif time_col == "index" and is_timedelta64_dtype(trj.index):
        trj.index = pd.to_datetime(trj.index)
        _trj = _resample_time(trj, step_time)
        _trj.index = pd.to_timedelta(_trj.index)
    elif time_col:
        if isinstance(step_time, str):
            try:
                if "." in step_time:
                    raise NotImplementedError(
                        """Fractional step time not implemented.
                          For milliseconds/microseconds/nanoseconds use:
                            L       milliseonds
                            U       microseconds
                            N       nanoseconds
                            eg, step_time='2100L'""")
            except Exception:
                raise NotImplementedError(
                    f"Inferring from time format {step_time} not yet implemented."
                )
        _trj = trj.set_index(time_col)
        time_units = _trj.__dict__.get("time_units", "s")
        _trj.index = pd.to_datetime(_trj.index, unit=time_units)
        _trj = _resample_time(_trj, step_time)
    else:
        raise NotImplementedError(
            f"Time column ({time_col}) not of expected datasets type.")
    return _trj
Beispiel #17
0
    def _cython_operation(self,
                          kind,
                          values,
                          how,
                          axis,
                          min_count=-1,
                          **kwargs):
        assert kind in ['transform', 'aggregate']

        # can we do this operation with our cython functions
        # if not raise NotImplementedError

        # we raise NotImplemented if this is an invalid operation
        # entirely, e.g. adding datetimes

        # categoricals are only 1d, so we
        # are not setup for dim transforming
        if is_categorical_dtype(values):
            raise NotImplementedError(
                "categoricals are not support in cython ops ATM")
        elif is_datetime64_any_dtype(values):
            if how in ['add', 'prod', 'cumsum', 'cumprod']:
                raise NotImplementedError(
                    "datetime64 type does not support {} "
                    "operations".format(how))
        elif is_timedelta64_dtype(values):
            if how in ['prod', 'cumprod']:
                raise NotImplementedError(
                    "timedelta64 type does not support {} "
                    "operations".format(how))

        arity = self._cython_arity.get(how, 1)

        vdim = values.ndim
        swapped = False
        if vdim == 1:
            values = values[:, None]
            out_shape = (self.ngroups, arity)
        else:
            if axis > 0:
                swapped = True
                values = values.swapaxes(0, axis)
            if arity > 1:
                raise NotImplementedError("arity of more than 1 is not "
                                          "supported for the 'how' argument")
            out_shape = (self.ngroups, ) + values.shape[1:]

        is_datetimelike = needs_i8_conversion(values.dtype)
        is_numeric = is_numeric_dtype(values.dtype)

        if is_datetimelike:
            values = values.view('int64')
            is_numeric = True
        elif is_bool_dtype(values.dtype):
            values = ensure_float64(values)
        elif is_integer_dtype(values):
            # we use iNaT for the missing value on ints
            # so pre-convert to guard this condition
            if (values == iNaT).any():
                values = ensure_float64(values)
            else:
                values = ensure_int64_or_float64(values)
        elif is_numeric and not is_complex_dtype(values):
            values = ensure_float64(values)
        else:
            values = values.astype(object)

        try:
            func = self._get_cython_function(kind, how, values, is_numeric)
        except NotImplementedError:
            if is_numeric:
                values = ensure_float64(values)
                func = self._get_cython_function(kind, how, values, is_numeric)
            else:
                raise

        if how == 'rank':
            out_dtype = 'float'
        else:
            if is_numeric:
                out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize)
            else:
                out_dtype = 'object'

        labels, _, _ = self.group_info

        if kind == 'aggregate':
            result = _maybe_fill(np.empty(out_shape, dtype=out_dtype),
                                 fill_value=np.nan)
            counts = np.zeros(self.ngroups, dtype=np.int64)
            result = self._aggregate(result, counts, values, labels, func,
                                     is_numeric, is_datetimelike, min_count)
        elif kind == 'transform':
            result = _maybe_fill(np.empty_like(values, dtype=out_dtype),
                                 fill_value=np.nan)

            # TODO: min_count
            result = self._transform(result, values, labels, func, is_numeric,
                                     is_datetimelike, **kwargs)

        if is_integer_dtype(result) and not is_datetimelike:
            mask = result == iNaT
            if mask.any():
                result = result.astype('float64')
                result[mask] = np.nan

        if (kind == 'aggregate' and self._filter_empty_groups
                and not counts.all()):
            if result.ndim == 2:
                try:
                    result = lib.row_bool_subset(result,
                                                 (counts > 0).view(np.uint8))
                except ValueError:
                    result = lib.row_bool_subset_object(
                        ensure_object(result), (counts > 0).view(np.uint8))
            else:
                result = result[counts > 0]

        if vdim == 1 and arity == 1:
            result = result[:, 0]

        if how in self._name_functions:
            # TODO
            names = self._name_functions[how]()
        else:
            names = None

        if swapped:
            result = result.swapaxes(0, axis)

        return result, names
Beispiel #18
0
    def _cython_operation(self,
                          kind: str,
                          values,
                          how: str,
                          axis,
                          min_count: int = -1,
                          **kwargs) -> Tuple[np.ndarray, Optional[List[str]]]:
        """
        Returns the values of a cython operation as a Tuple of [data, names].

        Names is only useful when dealing with 2D results, like ohlc
        (see self._name_functions).
        """

        assert kind in ["transform", "aggregate"]
        orig_values = values

        if values.ndim > 2:
            raise NotImplementedError(
                "number of dimensions is currently limited to 2")
        elif values.ndim == 2:
            # Note: it is *not* the case that axis is always 0 for 1-dim values,
            #  as we can have 1D ExtensionArrays that we need to treat as 2D
            assert axis == 1, axis

        # can we do this operation with our cython functions
        # if not raise NotImplementedError

        # we raise NotImplemented if this is an invalid operation
        # entirely, e.g. adding datetimes

        # categoricals are only 1d, so we
        # are not setup for dim transforming
        if is_categorical_dtype(values) or is_sparse(values):
            raise NotImplementedError(f"{values.dtype} dtype not supported")
        elif is_datetime64_any_dtype(values):
            if how in ["add", "prod", "cumsum", "cumprod"]:
                raise NotImplementedError(
                    f"datetime64 type does not support {how} operations")
        elif is_timedelta64_dtype(values):
            if how in ["prod", "cumprod"]:
                raise NotImplementedError(
                    f"timedelta64 type does not support {how} operations")

        if is_datetime64tz_dtype(values.dtype):
            # Cast to naive; we'll cast back at the end of the function
            # TODO: possible need to reshape?  kludge can be avoided when
            #  2D EA is allowed.
            values = values.view("M8[ns]")

        is_datetimelike = needs_i8_conversion(values.dtype)
        is_numeric = is_numeric_dtype(values.dtype)

        if is_datetimelike:
            values = values.view("int64")
            is_numeric = True
        elif is_bool_dtype(values.dtype):
            values = ensure_float64(values)
        elif is_integer_dtype(values):
            # we use iNaT for the missing value on ints
            # so pre-convert to guard this condition
            if (values == iNaT).any():
                values = ensure_float64(values)
            else:
                values = ensure_int_or_float(values)
        elif is_numeric and not is_complex_dtype(values):
            values = ensure_float64(values)
        else:
            values = values.astype(object)

        arity = self._cython_arity.get(how, 1)

        vdim = values.ndim
        swapped = False
        if vdim == 1:
            values = values[:, None]
            out_shape = (self.ngroups, arity)
        else:
            if axis > 0:
                swapped = True
                assert axis == 1, axis
                values = values.T
            if arity > 1:
                raise NotImplementedError(
                    "arity of more than 1 is not supported for the 'how' argument"
                )
            out_shape = (self.ngroups, ) + values.shape[1:]

        func, values = self._get_cython_func_and_vals(kind, how, values,
                                                      is_numeric)

        if how == "rank":
            out_dtype = "float"
        else:
            if is_numeric:
                out_dtype = f"{values.dtype.kind}{values.dtype.itemsize}"
            else:
                out_dtype = "object"

        codes, _, _ = self.group_info

        if kind == "aggregate":
            result = _maybe_fill(np.empty(out_shape, dtype=out_dtype),
                                 fill_value=np.nan)
            counts = np.zeros(self.ngroups, dtype=np.int64)
            result = self._aggregate(result, counts, values, codes, func,
                                     is_datetimelike, min_count)
        elif kind == "transform":
            result = _maybe_fill(np.empty_like(values, dtype=out_dtype),
                                 fill_value=np.nan)

            # TODO: min_count
            result = self._transform(result, values, codes, func,
                                     is_datetimelike, **kwargs)

        if is_integer_dtype(result) and not is_datetimelike:
            mask = result == iNaT
            if mask.any():
                result = result.astype("float64")
                result[mask] = np.nan
        elif (how == "add" and is_integer_dtype(orig_values.dtype)
              and is_extension_array_dtype(orig_values.dtype)):
            # We need this to ensure that Series[Int64Dtype].resample().sum()
            # remains int64 dtype.
            # Two options for avoiding this special case
            # 1. mask-aware ops and avoid casting to float with NaN above
            # 2. specify the result dtype when calling this method
            result = result.astype("int64")

        if kind == "aggregate" and self._filter_empty_groups and not counts.all(
        ):
            assert result.ndim != 2
            result = result[counts > 0]

        if vdim == 1 and arity == 1:
            result = result[:, 0]

        names: Optional[List[str]] = self._name_functions.get(how, None)

        if swapped:
            result = result.swapaxes(0, axis)

        if is_datetime64tz_dtype(orig_values.dtype):
            result = type(orig_values)(result.astype(np.int64),
                                       dtype=orig_values.dtype)
        elif is_datetimelike and kind == "aggregate":
            result = result.astype(orig_values.dtype)

        return result, names