예제 #1
0
def test_get_data_names():
    dl = DataLoader()
    # Returns empty set as there is no .csv file in 'data' folder
    data_path = dl.get_data_home()
    file_names = dl.get_data_names(data_path=data_path)
    assert file_names == []

    data_path = dl.get_data_home(data_sub_dir="daily")
    file_names = dl.get_data_names(data_path=data_path)
    assert set(file_names) == {
        "daily_temperature_australia", "daily_demand_order",
        "daily_female_births", "daily_istanbul_stock", "daily_peyton_manning"
    }
예제 #2
0
def test_get_df():
    dl = DataLoader()
    # Daily data
    data_path = dl.get_data_home(data_dir=None, data_sub_dir="daily")
    df = dl.get_df(data_path=data_path, data_name="daily_peyton_manning")
    assert list(df.columns) == ["ts", "y"]
    assert df.shape == (2905, 2)

    # Hourly data
    data_path = dl.get_data_home(data_dir=None, data_sub_dir="hourly")
    df = dl.get_df(data_path=data_path, data_name="hourly_parking")
    assert list(df.columns) == [
        "SystemCodeNumber", "Capacity", "Occupancy", "LastUpdated"
    ]
    assert df.shape == (35717, 4)

    # Error due to wrong file name
    data_path = dl.get_data_home(data_dir=None, data_sub_dir="daily")
    file_path = os.path.join(data_path, "parking.csv")
    file_names = dl.get_data_names(data_path=data_path)
    with pytest.raises(
            ValueError,
            match=
            fr"Given file path '{file_path}' is not found. Available datasets "
            fr"in data directory '{data_path}' are \{file_names}\."):
        dl.get_df(data_path=data_path, data_name="parking")