コード例 #1
0
def test_random_state():
    df = pd.DataFrame(np.arange(1000).reshape(100, 10))
    # test passes if function raises a TypeError
    with pytest.raises(TypeError):
        params = {"X": df, "test_size": 0.2, "random_state": "hello"}
        split = Split()
        split.train_test_split(params=params)
コード例 #2
0
ファイル: test_split.py プロジェクト: RiyaGupta99/preprocessy
def test_without_target_col():
    df = pd.DataFrame(np.arange(1000).reshape(100, 10))
    params = {"train_df": df, "test_size": 0.2, "random_state": 420}
    split = Split()
    split.train_test_split(params=params)
    assert params["X_train"].shape[0] == 80
    assert params["X_train"].shape[1] == 10
    assert params["X_test"].shape[0] == 20
    assert params["X_test"].shape[1] == 10
コード例 #3
0
def test_without_target_label():
    df_x = pd.DataFrame(np.arange(1000).reshape(100, 10))
    df_y = pd.Series(np.arange(100))
    with pytest.raises(ValueError):
        params = {
            "X": df_x,
            "y": df_y,
            "test_size": 0.2,
            "random_state": 420,
        }
        split = Split()
        split.train_test_split(params=params)
コード例 #4
0
 def __init__(
     self,
     train_df_path=None,
     test_df_path=None,
     steps=None,
     config_file=None,
     params=None,
     custom_reader=None,
 ):
     steps = [
         Parser().parse_dataset,
         NullValuesHandler().execute,
         Encoder().encode,
         HandleOutlier().handle_outliers,
         Scaler().execute,
         SelectKBest().fit_transform,
         Split().train_test_split,
     ]
     super().__init__(
         train_df_path=train_df_path,
         test_df_path=test_df_path,
         steps=steps,
         config_file=config_file,
         params=params,
         custom_reader=custom_reader,
     )
コード例 #5
0
import numpy as np
import pandas as pd
import pytest
from preprocessy.resampling import Split

split = Split()


class TestSplitting:
    def test_without_target_label(self):
        df = pd.DataFrame(np.arange(1000).reshape(100, 10))
        train, test = split.train_test_split(X=df,
                                             test_size=0.2,
                                             random_state=420)
        assert train.shape[0] == 80
        assert train.shape[1] == 10
        assert test.shape[0] == 20
        assert test.shape[1] == 10

    def test_random_state(self):
        df = pd.DataFrame(np.arange(1000).reshape(100, 10))
        # test passes if function raises a TypeError
        with pytest.raises(TypeError):
            train, test = split.train_test_split(X=df,
                                                 test_size=0.2,
                                                 random_state="hello")