def test_random_state(): df = pd.DataFrame(np.arange(1000).reshape(100, 10)) # test passes if function raises a TypeError with pytest.raises(TypeError): params = {"X": df, "test_size": 0.2, "random_state": "hello"} split = Split() split.train_test_split(params=params)
def test_without_target_col(): df = pd.DataFrame(np.arange(1000).reshape(100, 10)) params = {"train_df": df, "test_size": 0.2, "random_state": 420} split = Split() split.train_test_split(params=params) assert params["X_train"].shape[0] == 80 assert params["X_train"].shape[1] == 10 assert params["X_test"].shape[0] == 20 assert params["X_test"].shape[1] == 10
def test_without_target_label(): df_x = pd.DataFrame(np.arange(1000).reshape(100, 10)) df_y = pd.Series(np.arange(100)) with pytest.raises(ValueError): params = { "X": df_x, "y": df_y, "test_size": 0.2, "random_state": 420, } split = Split() split.train_test_split(params=params)
def __init__( self, train_df_path=None, test_df_path=None, steps=None, config_file=None, params=None, custom_reader=None, ): steps = [ Parser().parse_dataset, NullValuesHandler().execute, Encoder().encode, HandleOutlier().handle_outliers, Scaler().execute, SelectKBest().fit_transform, Split().train_test_split, ] super().__init__( train_df_path=train_df_path, test_df_path=test_df_path, steps=steps, config_file=config_file, params=params, custom_reader=custom_reader, )
import numpy as np import pandas as pd import pytest from preprocessy.resampling import Split split = Split() class TestSplitting: def test_without_target_label(self): df = pd.DataFrame(np.arange(1000).reshape(100, 10)) train, test = split.train_test_split(X=df, test_size=0.2, random_state=420) assert train.shape[0] == 80 assert train.shape[1] == 10 assert test.shape[0] == 20 assert test.shape[1] == 10 def test_random_state(self): df = pd.DataFrame(np.arange(1000).reshape(100, 10)) # test passes if function raises a TypeError with pytest.raises(TypeError): train, test = split.train_test_split(X=df, test_size=0.2, random_state="hello")