def test_truncate() -> None: start = datetime(2001, 1, 1) stop = datetime(2001, 1, 2) s1 = pl.date_range(start, stop, timedelta(minutes=30), name="dates", time_unit="ms") s2 = pl.date_range(start, stop, timedelta(minutes=30), name="dates", time_unit="ns") # we can pass strings and timedeltas for out in [s1.dt.truncate("1h"), s2.dt.truncate(timedelta(hours=1))]: assert out.dt[0] == start assert out.dt[1] == start assert out.dt[2] == start + timedelta(hours=1) assert out.dt[3] == start + timedelta(hours=1) # ... assert out.dt[-3] == stop - timedelta(hours=1) assert out.dt[-2] == stop - timedelta(hours=1) assert out.dt[-1] == stop
def test_date_offset() -> None: out = pl.DataFrame( {"dates": pl.date_range(datetime(2000, 1, 1), datetime(2020, 1, 1), "1y")} ).with_columns( [ pl.col("dates").dt.offset_by("1y").alias("date_plus_1y"), pl.col("dates").dt.offset_by("-1y2mo").alias("date_min"), ] ) assert (out["date_plus_1y"].dt.day() == 1).all() assert (out["date_min"].dt.day() == 1).all() assert out["date_min"].to_list() == [ datetime(1998, 11, 1, 0, 0), datetime(1999, 11, 1, 0, 0), datetime(2000, 11, 1, 0, 0), datetime(2001, 11, 1, 0, 0), datetime(2002, 11, 1, 0, 0), datetime(2003, 11, 1, 0, 0), datetime(2004, 11, 1, 0, 0), datetime(2005, 11, 1, 0, 0), datetime(2006, 11, 1, 0, 0), datetime(2007, 11, 1, 0, 0), datetime(2008, 11, 1, 0, 0), datetime(2009, 11, 1, 0, 0), datetime(2010, 11, 1, 0, 0), datetime(2011, 11, 1, 0, 0), datetime(2012, 11, 1, 0, 0), datetime(2013, 11, 1, 0, 0), datetime(2014, 11, 1, 0, 0), datetime(2015, 11, 1, 0, 0), datetime(2016, 11, 1, 0, 0), datetime(2017, 11, 1, 0, 0), datetime(2018, 11, 1, 0, 0), ]
def test_date_range() -> None: result = pl.date_range( datetime(1985, 1, 1), datetime(2015, 7, 1), timedelta(days=1, hours=12) ) assert len(result) == 7426 assert result.dt[0] == datetime(1985, 1, 1) assert result.dt[1] == datetime(1985, 1, 2, 12, 0) assert result.dt[2] == datetime(1985, 1, 4, 0, 0) assert result.dt[-1] == datetime(2015, 6, 30, 12, 0) for tu in ["ns", "ms"]: rng = pl.date_range( datetime(2020, 1, 1), datetime(2020, 1, 2), "2h", time_unit=tu ) assert rng.time_unit == tu assert rng.shape == (13,) assert rng.dt[0] == datetime(2020, 1, 1) assert rng.dt[-1] == datetime(2020, 1, 2)
def test_date_range() -> None: result = pl.date_range( date(1985, 1, 1), date(2015, 7, 1), timedelta(days=1, hours=12) ) assert len(result) == 7426 assert result.dt[0] == datetime(1985, 1, 1) assert result.dt[1] == datetime(1985, 1, 2, 12, 0) assert result.dt[2] == datetime(1985, 1, 4, 0, 0) assert result.dt[-1] == datetime(2015, 6, 30, 12, 0) for tu in DTYPE_TEMPORAL_UNITS: rng = pl.date_range(datetime(2020, 1, 1), date(2020, 1, 2), "2h", time_unit=tu) assert rng.time_unit == tu assert rng.shape == (13,) assert rng.dt[0] == datetime(2020, 1, 1) assert rng.dt[-1] == datetime(2020, 1, 2) # if low/high are both date, range is also be date _iif_ the granularity is >= 1d result = pl.date_range(date(2022, 1, 1), date(2022, 3, 1), "1mo", name="drange") assert result.to_list() == [date(2022, 1, 1), date(2022, 2, 1), date(2022, 3, 1)] assert result.name == "drange" result = pl.date_range(date(2022, 1, 1), date(2022, 1, 2), "1h30m") assert result == [ datetime(2022, 1, 1, 0, 0), datetime(2022, 1, 1, 1, 30), datetime(2022, 1, 1, 3, 0), datetime(2022, 1, 1, 4, 30), datetime(2022, 1, 1, 6, 0), datetime(2022, 1, 1, 7, 30), datetime(2022, 1, 1, 9, 0), datetime(2022, 1, 1, 10, 30), datetime(2022, 1, 1, 12, 0), datetime(2022, 1, 1, 13, 30), datetime(2022, 1, 1, 15, 0), datetime(2022, 1, 1, 16, 30), datetime(2022, 1, 1, 18, 0), datetime(2022, 1, 1, 19, 30), datetime(2022, 1, 1, 21, 0), datetime(2022, 1, 1, 22, 30), datetime(2022, 1, 2, 0, 0), ]
def test_datetime_units() -> None: df = pl.DataFrame( { "ns": pl.date_range( datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="ns" ), "us": pl.date_range( datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="us" ), "ms": pl.date_range( datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="ms" ), } ) names = set(df.columns) for unit in DTYPE_TEMPORAL_UNITS: subset = names - {unit} assert ( len(set(df.select([pl.all().exclude(pl.Datetime(unit))]).columns) - subset) == 0 )
def test_groupby_rolling_by_() -> None: df = pl.DataFrame({"group": pl.arange(0, 3, eager=True)}).join( pl.DataFrame( { "datetime": pl.date_range( datetime(2020, 1, 1), datetime(2020, 1, 5), "1d" ), } ), how="cross", ) out = ( df.sort("datetime") .groupby_rolling(index_column="datetime", by="group", period="3d") .agg([pl.count().alias("count")]) ) expected = ( df.sort(["group", "datetime"]) .groupby_rolling(index_column="datetime", by="group", period="3d") .agg([pl.count().alias("count")]) ) assert out.sort(["group", "datetime"]).frame_equal(expected) assert out.to_dict(False) == { "group": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2], "datetime": [ datetime(2020, 1, 1, 0, 0), datetime(2020, 1, 2, 0, 0), datetime(2020, 1, 3, 0, 0), datetime(2020, 1, 4, 0, 0), datetime(2020, 1, 5, 0, 0), datetime(2020, 1, 1, 0, 0), datetime(2020, 1, 2, 0, 0), datetime(2020, 1, 3, 0, 0), datetime(2020, 1, 4, 0, 0), datetime(2020, 1, 5, 0, 0), datetime(2020, 1, 1, 0, 0), datetime(2020, 1, 2, 0, 0), datetime(2020, 1, 3, 0, 0), datetime(2020, 1, 4, 0, 0), datetime(2020, 1, 5, 0, 0), ], "count": [1, 2, 3, 3, 3, 1, 2, 3, 3, 3, 1, 2, 3, 3, 3], }
def test_to_numpy() -> None: s0 = pl.Series("date", [123543, 283478, 1243]).cast(pl.Date) s1 = pl.Series( "datetime", [datetime(2021, 1, 2, 3, 4, 5), datetime(2021, 2, 3, 4, 5, 6)]) s2 = pl.date_range(datetime(2021, 1, 1, 0), datetime(2021, 1, 1, 1), interval="1h", time_unit="ms") assert str(s0.to_numpy()) == "['2308-04-02' '2746-02-20' '1973-05-28']" assert (str(s1.to_numpy()[:2]) == "['2021-01-02T03:04:05.000000' '2021-02-03T04:05:06.000000']") assert (str(s2.to_numpy()[:2]) == "['2021-01-01T00:00:00.000' '2021-01-01T01:00:00.000']") s3 = pl.Series([timedelta(hours=1), timedelta(hours=-2)]) out = np.array([3_600_000_000_000, -7_200_000_000_000], dtype="timedelta64[ns]") assert (s3.to_numpy() == out).all()
def test_unique_counts_on_dates() -> None: assert pl.DataFrame( { "dt_ns": pl.date_range(datetime(2020, 1, 1), datetime(2020, 3, 1), "1mo"), } ).with_columns( [ pl.col("dt_ns").dt.cast_time_unit("us").alias("dt_us"), pl.col("dt_ns").dt.cast_time_unit("ms").alias("dt_ms"), pl.col("dt_ns").cast(pl.Date).alias("date"), ] ).select( pl.all().unique_counts().sum() ).to_dict( False ) == { "dt_ns": [3], "dt_us": [3], "dt_ms": [3], "date": [3], }
def test_groupby_rolling_negative_offset_3914() -> None: df = pl.DataFrame( { "datetime": pl.date_range(datetime(2020, 1, 1), datetime(2020, 1, 5), "1d"), } ) assert df.groupby_rolling(index_column="datetime", period="2d", offset="-4d").agg( pl.count().alias("count") )["count"].to_list() == [0, 0, 1, 2, 2] df = pl.DataFrame( { "ints": range(0, 20), } ) assert df.groupby_rolling(index_column="ints", period="2i", offset="-5i",).agg( [pl.col("ints").alias("matches")] )["matches"].to_list() == [ [], [], [], [0], [0, 1], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [7, 8], [8, 9], [9, 10], [10, 11], [11, 12], [12, 13], [13, 14], [14, 15], [15, 16], ]
def test_quarter() -> None: assert pl.date_range( datetime(2022, 1, 1), datetime(2022, 12, 1), "1mo" ).dt.quarter().to_list() == [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4]