Example #1
0
def test_supervised_data_split_sizes():
    # Test that the data splits are of correct sizes

    toy_data = pd.DataFrame({
        "col1": [1, 1, 1, 1],
        "col2": [2, 2, 2, 2],
        "col3": [3, 3, 3, 3]
    })

    supervised_data = pymleda.SupervisedData(toy_data,
                                             x_cols=["col1", "col2"],
                                             y_cols=["col3"])

    assert len(supervised_data.test_df) == 0.25 * len(toy_data)
    assert len(supervised_data.train_df) == 0.75 * len(toy_data)

    # Reconstruct the original dataframe from the two subsets as a second check
    reconstructed = pd.concat(
        [supervised_data.test_df, supervised_data.train_df])

    assert (pd.testing.assert_frame_equal(
        reconstructed,
        toy_data,
        check_like=True,
    ) is None)
Example #2
0
def test_supervised_data_invalid_input():
    # Test that an Exception is raised with invalid input

    toy_data = pd.DataFrame({
        "col1": [1, 1, 1, 1],
        "col2": [2, 2, 2, 2],
        "col3": [3, 3, 3, 3]
    })

    with pytest.raises(Exception):
        pymleda.SupervisedData(1, x_cols=["col1", "col2"], y_cols=["col3"])

    with pytest.raises(Exception):
        pymleda.SupervisedData(toy_data, x_cols=1, y_cols=["col3"])

    with pytest.raises(Exception):
        pymleda.SupervisedData(toy_data, x_cols=["col1", "col2"], y_cols=1)
Example #3
0
def test_supervised_data_original_dataset():
    # Simple test to ensure that the data attribute returns the original
    # data unchanged
    toy_data = pd.DataFrame({
        "col1": [1, 1, 1, 1],
        "col2": [2, 2, 2, 2],
        "col3": [3, 3, 3, 3]
    })

    assert (pd.testing.assert_frame_equal(
        pymleda.SupervisedData(
            toy_data, x_cols=["col1", "col2"], y_cols=["col3"]).data,
        toy_data,
    ) is None)
Example #4
0
def test_supervised_data_y():
    # Test that the y portions of the data only contain the y columns

    toy_data = pd.DataFrame({
        "col1": [1, 1, 1, 1],
        "col2": [2, 2, 2, 2],
        "col3": [3, 3, 3, 3]
    })

    supervised_data = pymleda.SupervisedData(toy_data,
                                             x_cols=["col1", "col2"],
                                             y_cols=["col3"])

    assert list(supervised_data.y_train.columns) == ["col3"]
    assert list(supervised_data.y_test.columns) == ["col3"]
Example #5
0
def test_autoimpute_na_3(model_df):
    """Test that the input of the pymleda.autoimpute_na()
    function is not a dataframe"""
    with pytest.raises(Exception):
        pymleda.SupervisedData(pymleda.autoimpute_na(777), model_df)