Ejemplo n.º 1
0
def test_from_gluonts_list_dataset():
    number_of_ts = 10  # number of time series
    ts_length = 100  # number of timesteps
    prediction_length = 24
    freq = "D"
    custom_dataset = np.random.normal(size=(number_of_ts, ts_length))
    start = pd.Timestamp("01-01-2019", freq=freq)  # type: ignore

    gluonts_list_dataset = ListDataset(
        [{
            "target": x,
            "start": start
        } for x in custom_dataset[:, :-prediction_length]],
        freq=freq,
    )
    TimeSeriesDataFrame(gluonts_list_dataset)

    ts_df = TimeSeriesDataFrame(ListDataset(SAMPLE_ITERABLE, freq=freq))
    pd.testing.assert_frame_equal(ts_df,
                                  SAMPLE_TS_DATAFRAME,
                                  check_dtype=False)

    empty_list_dataset = ListDataset([], freq=freq)
    with pytest.raises(ValueError):
        TimeSeriesDataFrame(empty_list_dataset)
Ejemplo n.º 2
0
def test_validate_data_frame():
    item_ids = pd.Series(np.repeat(ITEM_IDS, 3))
    datetimes = pd.Series(np.tile(DATETIME_INDEX, 3))
    targets = pd.Series(TARGETS)
    df = pd.concat([item_ids, datetimes, targets], axis=1)

    with pytest.raises(ValueError):
        TimeSeriesDataFrame(df)

    df.columns = ["item_id", "timestamp", "target"]
    TimeSeriesDataFrame(df)
Ejemplo n.º 3
0
def test_when_dataset_constructed_with_irregular_timestamps_then_constructor_raises(
        list_of_timestamps):
    df_tuples = []
    for i, ts in enumerate(list_of_timestamps):
        for t in ts:
            df_tuples.append((i, pd.Timestamp(t), np.random.rand()))

    df = pd.DataFrame(df_tuples, columns=[ITEMID, TIMESTAMP, "target"])

    with pytest.raises(ValueError, match="uniformly sampled"):
        TimeSeriesDataFrame.from_data_frame(df)
Ejemplo n.º 4
0
def test_validate_multi_index_data_frame():
    TimeSeriesDataFrame(SAMPLE_TS_DATAFRAME)

    target = list(range(4))
    item_ids = (1, 2, 3, 4)

    with pytest.raises(ValueError):
        TimeSeriesDataFrame(np.array([item_ids, target]).T, freq="D")

    ts_df = pd.Series(target, name="target", index=item_ids).to_frame()
    with pytest.raises(ValueError):
        TimeSeriesDataFrame(ts_df, freq="D")
Ejemplo n.º 5
0
def test_from_iterable():
    ts_df = TimeSeriesDataFrame(SAMPLE_ITERABLE)
    pd.testing.assert_frame_equal(ts_df, SAMPLE_TS_DATAFRAME, check_dtype=True)

    with pytest.raises(ValueError):
        TimeSeriesDataFrame([])

    sample_iter = [{"target": [0, 1, 2]}]
    with pytest.raises(ValueError):
        TimeSeriesDataFrame(sample_iter)

    sample_iter = [{
        "target": [0, 1, 2],
        "start": pd.Timestamp("01-01-2019")
    }]  # type: ignore
    with pytest.raises(ValueError):
        TimeSeriesDataFrame(sample_iter)
Ejemplo n.º 6
0
def test_when_dataset_constructed_from_dataframe_without_freq_then_freq_is_inferred(
        timestamps, expected_freq):
    df = pd.DataFrame({
        "item_id": [0, 0, 0],
        "target": [1, 2, 3],
        "timestamp": map(pd.Timestamp, timestamps),  # noqa
    })

    ts_df = TimeSeriesDataFrame.from_data_frame(df)
    assert ts_df.freq == expected_freq
Ejemplo n.º 7
0
def test_when_dataframe_class_rename_called_then_output_correct(
        input_df, inplace):
    renamed_df = TimeSeriesDataFrame.rename(input_df,
                                            columns={"target": "mytarget"},
                                            inplace=inplace)
    if inplace:
        renamed_df = input_df

    assert isinstance(renamed_df, TimeSeriesDataFrame)
    assert "mytarget" in renamed_df.columns
    assert "target" not in renamed_df.columns
    if inplace:
        assert renamed_df._data is input_df._data
Ejemplo n.º 8
0
def test_when_dataset_sliced_by_step_then_output_times_and_values_correct(
        input_iterable, input_slice, expected_times, expected_values):
    df = TimeSeriesDataFrame.from_iterable_dataset(input_iterable)
    dfv = df.slice_by_timestep(input_slice)

    if not expected_times:
        assert len(dfv) == 0

    assert np.allclose(dfv["target"], expected_values)
    assert isinstance(dfv, TimeSeriesDataFrame)

    assert all(ixval[1] == pd.Timestamp(expected_times[i])
               for i, ixval in enumerate(dfv.index.values))  # type: ignore
Ejemplo n.º 9
0
def test_when_dataset_constructed_via_constructor_with_freq_then_freq_is_inferred(
        start_time, freq):
    item_list = ListDataset(
        [{
            "target": [1, 2, 3],
            "start": pd.Timestamp(start_time, freq=freq)
        } for _ in range(3)],  # type: ignore
        freq=freq,
    )

    ts_df = TimeSeriesDataFrame(item_list)

    assert ts_df.freq == freq
Ejemplo n.º 10
0
def test_when_dataframe_class_copy_called_then_output_correct(input_df):
    copied_df = TimeSeriesDataFrame.copy(input_df, deep=True)

    assert isinstance(copied_df, TimeSeriesDataFrame)
    assert copied_df._data is not input_df._data
Ejemplo n.º 11
0
def _build_ts_dataframe(item_ids, datetime_index, target):
    multi_inds = pd.MultiIndex.from_product([item_ids, datetime_index],
                                            names=["item_id", "timestamp"])
    return TimeSeriesDataFrame(
        pd.Series(target, name="target", index=multi_inds).to_frame())
Ejemplo n.º 12
0
def test_from_data_frame():
    tsdf_from_data_frame = TimeSeriesDataFrame(SAMPLE_DATAFRAME)
    pd.testing.assert_frame_equal(tsdf_from_data_frame,
                                  SAMPLE_TS_DATAFRAME,
                                  check_dtype=True)