def smooth_sg(trj: TrajaDataFrame, w: int = None, p: int = 3): """Returns ``DataFrame`` of trajectory after Savitzky-Golay filtering. Args: trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory w (int): window size (Default value = None) p (int): polynomial order (Default value = 3) Returns: trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory .. doctest:: >> df = traja.generate() >> traja.smooth_sg(df, w=101).head() x y time 0 -11.194803 12.312742 0.00 1 -10.236337 10.613720 0.02 2 -9.309282 8.954952 0.04 3 -8.412910 7.335925 0.06 4 -7.546492 5.756128 0.08 """ if w is None: w = p + 3 - p % 2 if w % 2 != 1: raise Exception(f"Invalid smoothing parameter w ({w}): n must be odd") _trj = trj.copy() _trj.x = signal.savgol_filter(_trj.x, window_length=w, polyorder=p, axis=0) _trj.y = signal.savgol_filter(_trj.y, window_length=w, polyorder=p, axis=0) _trj = fill_in_traj(_trj) return _trj
def grid_coordinates( trj: TrajaDataFrame, bins: Union[int, tuple] = None, xlim: tuple = None, ylim: tuple = None, assign: bool = False, ): """Returns ``DataFrame`` of trajectory discretized into 2D lattice grid coordinates. Args: trj (~`traja.frame.TrajaDataFrame`): Trajectory bins (tuple or int) xlim (tuple) ylim (tuple) assign (bool): Return updated original dataframe Returns: trj (~`traja.frame.TrajaDataFrame`): Trajectory is assign=True otherwise pd.DataFrame """ # Drop nan for converting to int trj.dropna(subset=["x", "y"], inplace=True) xmin = trj.x.min() if xlim is None else xlim[0] xmax = trj.x.max() if xlim is None else xlim[1] ymin = trj.y.min() if ylim is None else ylim[0] ymax = trj.y.max() if ylim is None else ylim[1] bins = _bins_to_tuple(trj, bins) if not xlim: xbin = pd.cut(trj.x, bins[0], labels=False) else: xmin, xmax = xlim xbinarray = np.linspace(xmin, xmax, bins[0]) xbin = np.digitize(trj.x, xbinarray) if not ylim: ybin = pd.cut(trj.y, bins[1], labels=False) else: ymin, ymax = ylim ybinarray = np.linspace(ymin, ymax, bins[1]) ybin = np.digitize(trj.y, ybinarray) if assign: trj["xbin"] = xbin trj["ybin"] = ybin return trj return pd.DataFrame({"xbin": xbin, "ybin": ybin})
def test_dataframe_to_trajadataframe(self): df = pd.DataFrame( {"x": range(len(self.df)), "y": range(len(self.df))}, index=self.df.index ) tf = TrajaDataFrame(df) assert isinstance(df, pd.DataFrame) assert isinstance(tf, TrajaDataFrame)
def angles(trj: TrajaDataFrame, lag: int = 1): if lag > 1: raise NotImplementedError("Lag must be 1.") trj["angle"] = np.rad2deg(np.arccos(np.abs(trj["dx"]) / trj["distance"])) # Get heading from angle mask = (trj["dx"] > 0) & (trj["dy"] >= 0) trj.loc[mask, "heading"] = trj["angle"][mask] mask = (trj["dx"] >= 0) & (trj["dy"] < 0) trj.loc[mask, "heading"] = -trj["angle"][mask] mask = (trj["dx"] < 0) & (trj["dy"] <= 0) trj.loc[mask, "heading"] = -(180 - trj["angle"][mask]) mask = (trj["dx"] <= 0) & (trj["dy"] > 0) trj.loc[mask, "heading"] = (180 - trj["angle"])[mask] trj["turn_angle"] = trj["heading"].diff() # Correction for 360-degree angle range trj.loc[trj.turn_angle >= 180, "turn_angle"] -= 360 trj.loc[trj.turn_angle < -180, "turn_angle"] += 360
def _resample_time(trj: TrajaDataFrame, step_time: Union[float, int, str], errors="coerce"): if not is_datetime_or_timedelta_dtype(trj.index): raise Exception(f"{trj.index.dtype} is not datetime or timedelta.") try: df = trj.resample(step_time).interpolate(method="spline", order=2) except ValueError as e: if len(e.args ) > 0 and "cannot reindex from a duplicate axis" in e.args[0]: if errors == "coerce": logger.warning("Duplicate time indices, keeping first") trj = trj.loc[~trj.index.duplicated(keep="first")] df = (trj.resample(step_time).bfill(limit=1).interpolate( method="spline", order=2)) else: logger.error("Error: duplicate time indices") raise ValueError("Duplicate values in indices") return df
def from_df(df: pd.DataFrame, xcol=None, ycol=None, time_col=None, **kwargs): """Returns a :class:`traja.frame.TrajaDataFrame` from a :class:`pandas DataFrame<pandas.DataFrame>`. Args: df (:class:`pandas.DataFrame`): Trajectory as pandas ``DataFrame`` xcol (str) ycol (str) timecol (str) Returns: traj_df (:class:`~traja.frame.TrajaDataFrame`): Trajectory .. doctest:: >>> df = pd.DataFrame({'x':[0,1,2],'y':[1,2,3]}) >>> traja.from_df(df) x y 0 0 1 1 1 2 2 2 3 """ traj_df = TrajaDataFrame(df) # Identify x and y columns if defined by user if xcol and ycol: traj_df["x"] = pd.to_numeric(traj_df[xcol], errors="coerce") traj_df["y"] = pd.to_numeric(traj_df[ycol], errors="coerce") if time_col: traj_df[time_col] = pd.to_timedelta( traj_df[time_col], unit=kwargs.get("time_units", "s") ) kwargs.update({"time_col": time_col}) # Initialize metadata for var in traj_df._metadata: if not hasattr(traj_df, var): traj_df.__dict__[var] = None # Save additional metadata for key, val in kwargs.items(): traj_df.__dict__[key] = val return traj_df
def smooth_sg(trj: TrajaDataFrame, w: int = None, p: int = 3): """Returns``DataFrame`` of trajectory after Savitzky-Golay filtering. Args: trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory w (int): window size (Default value = None) p (int): polynomial order (Default value = 3) Returns: trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory """ if w is None: w = p + 3 - p % 2 if w % 2 != 1: raise Exception(f"Invalid smoothing parameter w ({w}): n must be odd") trj.x = signal.savgol_filter(trj.x, window_length=w, polyorder=p, axis=0) trj.y = signal.savgol_filter(trj.y, window_length=w, polyorder=p, axis=0) trj = fill_in_traj(trj) return trj
def calc_heading(trj: TrajaDataFrame): """Calculate trajectory heading. Args: trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory Returns: heading (:class:`pandas.Series`): heading as a ``Series`` ..doctest:: >>> df = traja.TrajaDataFrame({'x':[0,1,2],'y':[1,2,3]}) >>> traja.calc_heading(df) 0 NaN 1 45.0 2 45.0 Name: heading, dtype: float64 """ if not _has_cols(trj, ["angle"]): angle = calc_angle(trj) else: angle = trj.angle if hasattr(angle, "unit"): if angle.unit == "radians": angle = np.rad2deg(angle) dx = trj.x.diff() dy = trj.y.diff() # Get heading from angle mask = (dx > 0) & (dy >= 0) trj.loc[mask, "heading"] = angle[mask] mask = (dx >= 0) & (dy < 0) trj.loc[mask, "heading"] = -angle[mask] mask = (dx < 0) & (dy <= 0) trj.loc[mask, "heading"] = -(180 - angle[mask]) mask = (dx <= 0) & (dy > 0) trj.loc[mask, "heading"] = 180 - angle[mask] return trj.heading
def resample_time(trj: TrajaDataFrame, step_time: str, new_fps: bool = None): """Returns a ``TrajaDataFrame`` resampled to consistent `step_time` intervals. Args: trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory step_time (str): step time interval (eg, '1s') new_fps (bool, optional): new fps Results: trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory .. doctest:: >>> from traja import generate >>> from traja.trajectory import resample_time >>> df = generate() >>> resampled = resample_time(df, '2s') >>> resampled.head() time x y 0 00:00:00 14.555071 -26.482614 1 00:00:02 -3.582797 -6.491297 2 00:00:04 -4.299709 26.937443 3 00:00:06 -25.337042 42.131848 4 00:00:08 33.069915 32.780830 """ time_col = _get_time_col(trj) if time_col is "index" and is_datetime64_any_dtype(trj.index): _trj = _resample_time(trj, step_time) elif time_col is "index" and is_timedelta64_dtype(trj.index): _trj = _resample_time(trj, step_time) elif time_col: if isinstance(step_time, str): try: if "." in step_time: raise NotImplementedError("Fractional step time not implemented.") except Exception: raise NotImplementedError( f"Inferring from time format {step_time} not yet implemented." ) _trj = trj.set_index(time_col) _trj.index = pd.to_timedelta(_trj.index, unit="s") _trj = _resample_time(_trj, step_time) _trj.reset_index(inplace=True) else: raise NotImplementedError( f"Time column ({time_col}) not of expected data type." ) return _trj
def calc_heading(trj: TrajaDataFrame): """Calculate trajectory heading. Args: assign (bool): (Default value = True) Returns: heading (:class:`pandas.Series`): heading as a ``Series`` ..doctest:: >>> df = traja.TrajaDataFrame({'x':[0,1,2],'y':[1,2,3]}) >>> traja.calc_heading(df) 0 NaN 1 45.0 2 45.0 Name: heading, dtype: float64 """ if not _has_cols(trj, ["angle"]): angle = calc_angle(trj) else: angle = trj.angle dx = trj.x.diff() dy = trj.y.diff() # Get heading from angle mask = (dx > 0) & (dy >= 0) trj.loc[mask, "heading"] = angle[mask] mask = (dx >= 0) & (dy < 0) trj.loc[mask, "heading"] = -angle[mask] mask = (dx < 0) & (dy <= 0) trj.loc[mask, "heading"] = -(180 - angle[mask]) mask = (dx <= 0) & (dy > 0) trj.loc[mask, "heading"] = 180 - angle[mask] return trj.heading
def polar_bar( trj: TrajaDataFrame, feature: str = "turn_angle", bin_size: int = 2, overlap: bool = True, ax: Optional[matplotlib.axes.Axes] = None, **plot_kws: str, ): """Plot polar bar chart. Args: trj feature (str): Options: 'turn_angle', 'heading' bins (int): width of bins overlap (bool): Overlapping shows all values, if set to false is a histogram Returns: ax """ DIST_THRESHOLD = 0.001 # Get displacement displacement = traja.trajectory.calc_displacement(trj) trj["displacement"] = displacement trj = trj.loc[trj.displacement > DIST_THRESHOLD] if feature == "turn_angle": feature_series = traja.trajectory.calc_turn_angle(trj) trj["turn_angle"] = feature_series trj.turn_angle = trj.turn_angle.shift(-1) elif feature == "heading": feature_series = traja.trajectory.calc_heading(trj) trj[feature] = feature_series trj = trj[pd.notnull(trj[feature])] trj = trj[pd.notnull(trj.displacement)] assert len( trj) > 0, "Dataframe is empty after filtering, check coordinates" ax = _polar_bar( trj.displacement, trj[feature], bin_size=bin_size, overlap=overlap, ax=ax, **plot_kws, ) return ax
def resample_time(trj: TrajaDataFrame, step_time: str, new_fps: Optional[bool] = None): """Returns a ``TrajaDataFrame`` resampled to consistent `step_time` intervals. ``step_time`` should be expressed as a number-time unit combination, eg "2S" for 2 seconds and “2100L” for 2100 milliseconds. Args: trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory step_time (str): step time interval / offset string (eg, '2S' (seconds), '50L' (milliseconds), '50N' (nanoseconds)) new_fps (bool, optional): new fps Results: trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory .. doctest:: >>> from traja import generate, resample_time >>> df = generate() >>> resampled = resample_time(df, '50L') # 50 milliseconds >>> resampled.head() x y time 1970-01-01 00:00:00.000 0.000000 0.000000 1970-01-01 00:00:00.050 0.999571 4.293384 1970-01-01 00:00:00.100 -1.298510 5.423373 1970-01-01 00:00:00.150 -6.056916 4.874502 1970-01-01 00:00:00.200 -10.347759 2.108385 """ time_col = _get_time_col(trj) if time_col == "index" and is_datetime64_any_dtype(trj.index): _trj = _resample_time(trj, step_time) elif time_col == "index" and is_timedelta64_dtype(trj.index): trj.index = pd.to_datetime(trj.index) _trj = _resample_time(trj, step_time) _trj.index = pd.to_timedelta(_trj.index) elif time_col: if isinstance(step_time, str): try: if "." in step_time: raise NotImplementedError( """Fractional step time not implemented. For milliseconds/microseconds/nanoseconds use: L milliseonds U microseconds N nanoseconds eg, step_time='2100L'""") except Exception: raise NotImplementedError( f"Inferring from time format {step_time} not yet implemented." ) _trj = trj.set_index(time_col) time_units = _trj.__dict__.get("time_units", "s") _trj.index = pd.to_datetime(_trj.index, unit=time_units) _trj = _resample_time(_trj, step_time) else: raise NotImplementedError( f"Time column ({time_col}) not of expected datasets type.") return _trj
def apply_all(trj: TrajaDataFrame, method: Callable, id_col: str, **kwargs): """Applies method to all trajectories""" return trj.groupby(by=id_col).apply(method, **kwargs)
def read_file( filepath: str, id: Optional[str] = None, xcol: Optional[str] = None, ycol: Optional[str] = None, parse_dates: Union[str, bool] = False, xlim: Optional[tuple] = None, ylim: Optional[tuple] = None, spatial_units: str = "m", fps: Optional[float] = None, **kwargs, ): """Convenience method wrapping pandas `read_csv` and initializing metadata. Args: filepath (str): path to csv file with `x`, `y` and `time` (optional) columns id (str): id for trajectory xcol (str): name of column containing x coordinates ycol (str): name of column containing y coordinates parse_dates (Union[list,bool]): The behavior is as follows: - boolean. if True -> try parsing the index. - list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 each as a separate date column. xlim (tuple): x limits (min,max) for plotting ylim (tuple): y limits (min,max) for plotting spatial_units (str): for plotting (eg, 'cm') fps (float): for time calculations **kwargs: Additional arguments for :meth:`pandas.read_csv`. Returns: traj_df (:class:`~traja.main.TrajaDataFrame`): Trajectory """ date_parser = kwargs.pop("date_parser", None) # TODO: Set index to first column containing 'time' df_test = pd.read_csv( filepath, nrows=10, parse_dates=parse_dates, infer_datetime_format=True ) if xcol is not None or ycol is not None: if not xcol in df_test or ycol not in df_test: raise Exception(f"{xcol} or {ycol} not found as headers.") # Strip whitespace whitespace_cols = [c for c in df_test if " " in df_test[c].name] stripped_cols = {c: lambda x: x.strip() for c in whitespace_cols} converters = {**stripped_cols, **kwargs.pop("converters", {})} # Downcast to float32 # TODO: Benchmark float32 vs float64 for very big dataset float_cols = df_test.select_dtypes(include=[np.float]).columns float32_cols = {c: np.float32 for c in float_cols} # Convert string columns to sequence_ids string_cols = [c for c in df_test if df_test[c].dtype == str] category_cols = {c: "category" for c in string_cols} dtype = {**float32_cols, **category_cols, **kwargs.pop("dtype", {})} # Parse time column if present time_cols = [col for col in df_test.columns if "time" in col.lower()] time_col = time_cols[0] if time_cols else None if parse_dates and not date_parser and time_col: # try different parsers format_strs = [ "%Y-%m-%d %H:%M:%S:%f", "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", ] for format_str in format_strs: date_parser = lambda x: pd.datetime.strptime(x, format_str) try: df_test = pd.read_csv( filepath, date_parser=date_parser, nrows=10, parse_dates=[time_col] ) except ValueError: pass if is_datetime64_any_dtype(df_test[time_col]): break elif is_timedelta64_dtype(df_test[time_col]): break else: # No datetime or timestamp column found date_parser = None if "csv" in filepath: trj = pd.read_csv( filepath, date_parser=date_parser, parse_dates=parse_dates or [time_col] if date_parser else False, converters=converters, dtype=dtype, **kwargs, ) # TODO: Replace default column renaming with user option if needed if time_col: trj.rename(columns={time_col: "time"}) elif fps is not None: time = np.array([x for x in trj.index], dtype=int) / fps trj["time"] = time else: # leave index as int frames pass if xcol and ycol: trj.rename(columns={xcol: "x", ycol: "y"}) else: # TODO: Implement for HDF5 and .npy files. raise NotImplementedError("Non-csv's not yet implemented") trj = TrajaDataFrame(trj) # Set meta properties of TrajaDataFrame metadata = dict( id=id, xlim=xlim, spatial_units=spatial_units, title=kwargs.get("title", None), xlabel=kwargs.get("xlabel", None), ylabel=kwargs.get("ylabel", None), fps=fps, ) trj.__dict__.update(**metadata) return trj
def read_file(filepath, **kwargs): """Convenience method wrapping pandas `read_csv` and initializing metadata. Args: filepath: **kwargs: Returns: """ xlim = kwargs.pop('xlim', None) ylim = kwargs.pop('ylim', None) title = kwargs.pop('title', "Trajectory") spatial_units = kwargs.pop('spatial_units', 'm') xlabel = kwargs.pop('xlabel', f"x ({spatial_units})") ylabel = kwargs.pop('ylabel', f"y ({spatial_units})") fps = kwargs.pop('fps', None) date_parser = kwargs.pop('data_parser', None) # TODO: Set index to first column containing 'time' df_test = pd.read_csv(filepath, nrows=10, parse_dates=True, infer_datetime_format=True) # Strip whitespace whitespace_cols = [c for c in df_test if ' ' in df_test[c].name] stripped_cols = {c: lambda x:x.strip() for c in whitespace_cols} converters = {**stripped_cols, **kwargs.pop('converters',{})} # Downcast to float32 # TODO: Benchmark float32 vs float64 for very big datasets float_cols = [c for c in df_test if 'float' in df_test[c].dtype] float32_cols = {c: np.float32 for c in float_cols} # Convert string columns to categories string_cols = [c for c in df_test if df_test[c].dtype == str] category_cols = {c: 'category' for c in string_cols} dtype = {**float32_cols, **category_cols, **kwargs.pop('dtype', {})} time_cols = [col for col in df_test.columns if 'time' in col.lower()] if 'csv' in filepath: trj = pd.read_csv(filepath, date_parser=date_parser, infer_datetime_format=kwargs.pop('infer_datetime_format', True), parse_dates=kwargs.pop('parse_dates', True), converters=converters, dtype=dtype, **kwargs) if time_cols: time_col = time_cols[0] trj.rename(columns={time_col: 'time'}) else: time = (trj.index) / fps trj['time'] = time else: # TODO: Implement for HDF5 and .npy files. raise NotImplementedError("Non-csv's not yet implemented") trj = TrajaDataFrame(trj) # Set meta properties of TrajaDataFrame trj.xlim = xlim trj.ylim = ylim trj.spatial_units = spatial_units trj.title = title trj.xlabel = xlabel trj.ylabel = ylabel trj.fps = fps return trj
def _resample_time(trj: TrajaDataFrame, step_time: Union[float, int]): if not is_datetime_or_timedelta_dtype(trj.index): raise Exception(f"{trj.index.dtype} is not datetime or timedelta.") return trj.resample(step_time).agg({"x": np.mean, "y": np.mean})