def test_to_episodic_format_with_long_time_gap_is_identical(self): rossi = load_rossi() rossi["id"] = np.arange(rossi.shape[0]) long_rossi = utils.to_episodic_format( rossi, duration_col="week", event_col="arrest", id_col="id", time_gaps=1000.0 ) # using astype(int) would fail on Windows because int32 and int64 are used as dtype long_rossi["week"] = long_rossi["stop"].astype(rossi["week"].dtype) del long_rossi["start"] del long_rossi["stop"] assert_frame_equal(long_rossi, rossi, check_like=True)
def test_to_episodic_format_uses_custom_index_as_id(self): df = pd.DataFrame({"T": [1, 3], "E": [1, 0]}, index=["A", "B"]) long_df = utils.to_episodic_format(df, "T", "E") assert long_df["id"].tolist() == ["A", "B", "B", "B"]
def test_to_episodic_format_adds_id_col(self): df = pd.DataFrame({"T": [1, 3], "E": [1, 0]}) long_df = utils.to_episodic_format(df, "T", "E") assert "id" in long_df.columns
def test_to_episodic_format_handles_floating_durations_and_preserves_events(self): df = pd.DataFrame({"T": [0.1, 3.5], "E": [1, 0], "id": [1, 2]}) long_df = utils.to_episodic_format(df, "T", "E", id_col="id", time_gaps=2.0).sort_values(["id", "stop"]) assert long_df.groupby("id").last()["E"].tolist() == [1, 0]
def test_to_episodic_format_handles_floating_durations_with_time_gaps(self): df = pd.DataFrame({"T": [0.1, 3.5], "E": [1, 1], "id": [1, 2]}) long_df = utils.to_episodic_format(df, "T", "E", id_col="id", time_gaps=2.0).sort_values(["id", "stop"]) assert long_df["stop"].tolist() == [0.1, 2, 3.5]
def test_to_episodic_format_handles_floating_durations(self): df = pd.DataFrame({"T": [0.1, 3.5], "E": [1, 1], "id": [1, 2]}) long_df = utils.to_episodic_format(df, "T", "E", id_col="id").sort_values(["id", "stop"]) assert long_df.shape[0] == 1 + 4 assert long_df["stop"].tolist() == [0.1, 1, 2, 3, 3.5]