Exemple #1
0
def test_truncate() -> None:
    start = datetime(2001, 1, 1)
    stop = datetime(2001, 1, 2)

    s1 = pl.date_range(start, stop, timedelta(minutes=30), name="dates", time_unit="ms")
    s2 = pl.date_range(start, stop, timedelta(minutes=30), name="dates", time_unit="ns")
    # we can pass strings and timedeltas
    for out in [s1.dt.truncate("1h"), s2.dt.truncate(timedelta(hours=1))]:
        assert out.dt[0] == start
        assert out.dt[1] == start
        assert out.dt[2] == start + timedelta(hours=1)
        assert out.dt[3] == start + timedelta(hours=1)
        # ...
        assert out.dt[-3] == stop - timedelta(hours=1)
        assert out.dt[-2] == stop - timedelta(hours=1)
        assert out.dt[-1] == stop
Exemple #2
0
def test_date_offset() -> None:
    out = pl.DataFrame(
        {"dates": pl.date_range(datetime(2000, 1, 1), datetime(2020, 1, 1), "1y")}
    ).with_columns(
        [
            pl.col("dates").dt.offset_by("1y").alias("date_plus_1y"),
            pl.col("dates").dt.offset_by("-1y2mo").alias("date_min"),
        ]
    )

    assert (out["date_plus_1y"].dt.day() == 1).all()
    assert (out["date_min"].dt.day() == 1).all()
    assert out["date_min"].to_list() == [
        datetime(1998, 11, 1, 0, 0),
        datetime(1999, 11, 1, 0, 0),
        datetime(2000, 11, 1, 0, 0),
        datetime(2001, 11, 1, 0, 0),
        datetime(2002, 11, 1, 0, 0),
        datetime(2003, 11, 1, 0, 0),
        datetime(2004, 11, 1, 0, 0),
        datetime(2005, 11, 1, 0, 0),
        datetime(2006, 11, 1, 0, 0),
        datetime(2007, 11, 1, 0, 0),
        datetime(2008, 11, 1, 0, 0),
        datetime(2009, 11, 1, 0, 0),
        datetime(2010, 11, 1, 0, 0),
        datetime(2011, 11, 1, 0, 0),
        datetime(2012, 11, 1, 0, 0),
        datetime(2013, 11, 1, 0, 0),
        datetime(2014, 11, 1, 0, 0),
        datetime(2015, 11, 1, 0, 0),
        datetime(2016, 11, 1, 0, 0),
        datetime(2017, 11, 1, 0, 0),
        datetime(2018, 11, 1, 0, 0),
    ]
Exemple #3
0
def test_date_range() -> None:
    result = pl.date_range(
        datetime(1985, 1, 1), datetime(2015, 7, 1), timedelta(days=1, hours=12)
    )
    assert len(result) == 7426
    assert result.dt[0] == datetime(1985, 1, 1)
    assert result.dt[1] == datetime(1985, 1, 2, 12, 0)
    assert result.dt[2] == datetime(1985, 1, 4, 0, 0)
    assert result.dt[-1] == datetime(2015, 6, 30, 12, 0)

    for tu in ["ns", "ms"]:
        rng = pl.date_range(
            datetime(2020, 1, 1), datetime(2020, 1, 2), "2h", time_unit=tu
        )
        assert rng.time_unit == tu
        assert rng.shape == (13,)
        assert rng.dt[0] == datetime(2020, 1, 1)
        assert rng.dt[-1] == datetime(2020, 1, 2)
Exemple #4
0
def test_date_range() -> None:
    result = pl.date_range(
        date(1985, 1, 1), date(2015, 7, 1), timedelta(days=1, hours=12)
    )
    assert len(result) == 7426
    assert result.dt[0] == datetime(1985, 1, 1)
    assert result.dt[1] == datetime(1985, 1, 2, 12, 0)
    assert result.dt[2] == datetime(1985, 1, 4, 0, 0)
    assert result.dt[-1] == datetime(2015, 6, 30, 12, 0)

    for tu in DTYPE_TEMPORAL_UNITS:
        rng = pl.date_range(datetime(2020, 1, 1), date(2020, 1, 2), "2h", time_unit=tu)
        assert rng.time_unit == tu
        assert rng.shape == (13,)
        assert rng.dt[0] == datetime(2020, 1, 1)
        assert rng.dt[-1] == datetime(2020, 1, 2)

    # if low/high are both date, range is also be date _iif_ the granularity is >= 1d
    result = pl.date_range(date(2022, 1, 1), date(2022, 3, 1), "1mo", name="drange")
    assert result.to_list() == [date(2022, 1, 1), date(2022, 2, 1), date(2022, 3, 1)]
    assert result.name == "drange"

    result = pl.date_range(date(2022, 1, 1), date(2022, 1, 2), "1h30m")
    assert result == [
        datetime(2022, 1, 1, 0, 0),
        datetime(2022, 1, 1, 1, 30),
        datetime(2022, 1, 1, 3, 0),
        datetime(2022, 1, 1, 4, 30),
        datetime(2022, 1, 1, 6, 0),
        datetime(2022, 1, 1, 7, 30),
        datetime(2022, 1, 1, 9, 0),
        datetime(2022, 1, 1, 10, 30),
        datetime(2022, 1, 1, 12, 0),
        datetime(2022, 1, 1, 13, 30),
        datetime(2022, 1, 1, 15, 0),
        datetime(2022, 1, 1, 16, 30),
        datetime(2022, 1, 1, 18, 0),
        datetime(2022, 1, 1, 19, 30),
        datetime(2022, 1, 1, 21, 0),
        datetime(2022, 1, 1, 22, 30),
        datetime(2022, 1, 2, 0, 0),
    ]
Exemple #5
0
def test_datetime_units() -> None:
    df = pl.DataFrame(
        {
            "ns": pl.date_range(
                datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="ns"
            ),
            "us": pl.date_range(
                datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="us"
            ),
            "ms": pl.date_range(
                datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="ms"
            ),
        }
    )
    names = set(df.columns)

    for unit in DTYPE_TEMPORAL_UNITS:
        subset = names - {unit}

        assert (
            len(set(df.select([pl.all().exclude(pl.Datetime(unit))]).columns) - subset)
            == 0
        )
Exemple #6
0
def test_groupby_rolling_by_() -> None:
    df = pl.DataFrame({"group": pl.arange(0, 3, eager=True)}).join(
        pl.DataFrame(
            {
                "datetime": pl.date_range(
                    datetime(2020, 1, 1), datetime(2020, 1, 5), "1d"
                ),
            }
        ),
        how="cross",
    )
    out = (
        df.sort("datetime")
        .groupby_rolling(index_column="datetime", by="group", period="3d")
        .agg([pl.count().alias("count")])
    )

    expected = (
        df.sort(["group", "datetime"])
        .groupby_rolling(index_column="datetime", by="group", period="3d")
        .agg([pl.count().alias("count")])
    )
    assert out.sort(["group", "datetime"]).frame_equal(expected)
    assert out.to_dict(False) == {
        "group": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
        "datetime": [
            datetime(2020, 1, 1, 0, 0),
            datetime(2020, 1, 2, 0, 0),
            datetime(2020, 1, 3, 0, 0),
            datetime(2020, 1, 4, 0, 0),
            datetime(2020, 1, 5, 0, 0),
            datetime(2020, 1, 1, 0, 0),
            datetime(2020, 1, 2, 0, 0),
            datetime(2020, 1, 3, 0, 0),
            datetime(2020, 1, 4, 0, 0),
            datetime(2020, 1, 5, 0, 0),
            datetime(2020, 1, 1, 0, 0),
            datetime(2020, 1, 2, 0, 0),
            datetime(2020, 1, 3, 0, 0),
            datetime(2020, 1, 4, 0, 0),
            datetime(2020, 1, 5, 0, 0),
        ],
        "count": [1, 2, 3, 3, 3, 1, 2, 3, 3, 3, 1, 2, 3, 3, 3],
    }
Exemple #7
0
def test_to_numpy() -> None:
    s0 = pl.Series("date", [123543, 283478, 1243]).cast(pl.Date)
    s1 = pl.Series(
        "datetime",
        [datetime(2021, 1, 2, 3, 4, 5),
         datetime(2021, 2, 3, 4, 5, 6)])
    s2 = pl.date_range(datetime(2021, 1, 1, 0),
                       datetime(2021, 1, 1, 1),
                       interval="1h",
                       time_unit="ms")
    assert str(s0.to_numpy()) == "['2308-04-02' '2746-02-20' '1973-05-28']"
    assert (str(s1.to_numpy()[:2]) ==
            "['2021-01-02T03:04:05.000000' '2021-02-03T04:05:06.000000']")
    assert (str(s2.to_numpy()[:2]) ==
            "['2021-01-01T00:00:00.000' '2021-01-01T01:00:00.000']")
    s3 = pl.Series([timedelta(hours=1), timedelta(hours=-2)])
    out = np.array([3_600_000_000_000, -7_200_000_000_000],
                   dtype="timedelta64[ns]")
    assert (s3.to_numpy() == out).all()
Exemple #8
0
def test_unique_counts_on_dates() -> None:
    assert pl.DataFrame(
        {
            "dt_ns": pl.date_range(datetime(2020, 1, 1), datetime(2020, 3, 1), "1mo"),
        }
    ).with_columns(
        [
            pl.col("dt_ns").dt.cast_time_unit("us").alias("dt_us"),
            pl.col("dt_ns").dt.cast_time_unit("ms").alias("dt_ms"),
            pl.col("dt_ns").cast(pl.Date).alias("date"),
        ]
    ).select(
        pl.all().unique_counts().sum()
    ).to_dict(
        False
    ) == {
        "dt_ns": [3],
        "dt_us": [3],
        "dt_ms": [3],
        "date": [3],
    }
Exemple #9
0
def test_groupby_rolling_negative_offset_3914() -> None:
    df = pl.DataFrame(
        {
            "datetime": pl.date_range(datetime(2020, 1, 1), datetime(2020, 1, 5), "1d"),
        }
    )
    assert df.groupby_rolling(index_column="datetime", period="2d", offset="-4d").agg(
        pl.count().alias("count")
    )["count"].to_list() == [0, 0, 1, 2, 2]

    df = pl.DataFrame(
        {
            "ints": range(0, 20),
        }
    )

    assert df.groupby_rolling(index_column="ints", period="2i", offset="-5i",).agg(
        [pl.col("ints").alias("matches")]
    )["matches"].to_list() == [
        [],
        [],
        [],
        [0],
        [0, 1],
        [1, 2],
        [2, 3],
        [3, 4],
        [4, 5],
        [5, 6],
        [6, 7],
        [7, 8],
        [8, 9],
        [9, 10],
        [10, 11],
        [11, 12],
        [12, 13],
        [13, 14],
        [14, 15],
        [15, 16],
    ]
Exemple #10
0
def test_quarter() -> None:
    assert pl.date_range(
        datetime(2022, 1, 1), datetime(2022, 12, 1), "1mo"
    ).dt.quarter().to_list() == [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4]