def test_coldrop_non_str_lbl(): """Testing the ColDrop pipeline stage.""" df = _test_df2() assert 2 in df.columns stage = ColDrop(2) res_df = stage.apply(df) assert 2 not in res_df.columns assert 'num1' in res_df.columns assert False in res_df.columns
def test_coldrop_lambda(): """Testing the ColDrop pipeline stage.""" df = _test_df() assert 'num1' in df.columns assert 'num2' in df.columns stage = ColDrop(lambda col: 'num' in col.name) res_df = stage.apply(df) assert 'num1' not in res_df.columns assert 'num2' not in res_df.columns assert 'char' in res_df.columns
def test_coldrop_multi_col(): """Testing the ColDrop pipeline stage.""" df = _test_df() assert 'num1' in df.columns assert 'num2' in df.columns stage = ColDrop(['num1', 'num2']) res_df = stage.apply(df) assert 'num1' not in res_df.columns assert 'num2' not in res_df.columns assert 'char' in res_df.columns
def test_fitonly_w_coldrop(): df = _test_df() stage = FitOnly(ColDrop('num1')) res = stage(df) assert 'num1' not in res.columns assert 'num2' in res.columns assert 'char' in res.columns df = _test_df() res = stage(df) assert 'num1' in res.columns assert 'num2' in res.columns assert 'char' in res.columns df = _test_df() res = stage.fit_transform(df) assert 'num1' not in res.columns assert 'num2' in res.columns assert 'char' in res.columns df = _test_df() res = stage(df, verbose=True) assert 'num1' in res.columns assert 'num2' in res.columns assert 'char' in res.columns
def test_coldrop_one_col(): """Testing the ColDrop pipeline stage.""" df = _test_df() assert 'num1' in df.columns stage = ColDrop('num1') res_df = stage.apply(df) assert 'num1' not in res_df.columns assert 'num2' in res_df.columns assert 'char' in res_df.columns # make sure fit is null operation for unfittable stages res_df = stage.fit(df) assert 'num1' in res_df.columns assert 'num2' in res_df.columns assert 'char' in res_df.columns res_df = stage.fit(df, verbose=True) assert 'num1' in res_df.columns assert 'num2' in res_df.columns assert 'char' in res_df.columns # make sure transform and fit_transform are equivalent to apply # for unfittable stages res_df = stage.transform(df) assert 'num1' not in res_df.columns assert 'num2' in res_df.columns assert 'char' in res_df.columns res_df = stage.transform(df, verbose=True) assert 'num1' not in res_df.columns assert 'num2' in res_df.columns assert 'char' in res_df.columns res_df = stage.fit_transform(df) assert 'num1' not in res_df.columns assert 'num2' in res_df.columns assert 'char' in res_df.columns res_df = stage.fit_transform(df, verbose=True) assert 'num1' not in res_df.columns assert 'num2' in res_df.columns assert 'char' in res_df.columns
def test_pipeline_error(time): """Test exceptions at pipeline level""" # test fit df = _test_df() func = lambda df: df['num1'] == df['num3'] pipeline = PdPipeline([ColByFrameFunc("Equality", func), ColDrop("B")]) with pytest.raises(PipelineApplicationError): pipeline.fit(df, verbose=True, time=time) # test transform df = _test_df() with pytest.raises(PipelineApplicationError): pipeline.transform(df, verbose=True, time=time) # test fit_transform df = _test_df() with pytest.raises(PipelineApplicationError): pipeline.fit_transform(df, verbose=True, time=time)
def test_coldrop_missing_col(): """Testing the ColDrop pipeline stage.""" df = _test_df() assert 'num1' in df.columns stage = ColDrop('num3') with pytest.raises(FailedPreconditionError): res_df = stage.apply(df) res_df = stage.apply(df, exraise=False) assert res_df.equals(df) # make sure fit is null operation for unfittable stages with pytest.raises(FailedPreconditionError): res_df = stage.fit(df) with pytest.raises(FailedPreconditionError): res_df = stage.fit(df, verbose=True) res_df = stage.fit(df, exraise=False) assert res_df.equals(df) # make sure transform and fit_transform are equivalent to apply # for unfittable stages with pytest.raises(FailedPreconditionError): res_df = stage.transform(df) with pytest.raises(FailedPreconditionError): res_df = stage.transform(df, verbose=True) res_df = stage.transform(df, exraise=False) assert res_df.equals(df) with pytest.raises(FailedPreconditionError): res_df = stage.fit_transform(df) with pytest.raises(FailedPreconditionError): res_df = stage.fit_transform(df, verbose=True) res_df = stage.fit_transform(df, exraise=False) assert res_df.equals(df) stage = ColDrop('num3', errors='ignore') res_df = stage.apply(df) assert res_df.equals(df) # make sure fit is null operation for unfittable stages res_df = stage.fit(df) assert res_df.equals(df) # make sure transform and fit_transform are equivalent to apply # for unfittable stages res_df = stage.fit_transform(df) assert res_df.equals(df) res_df = stage.transform(df) assert res_df.equals(df)
def test_fitonly_w_coldrop_missing_col(): df = _test_df() stage = FitOnly(ColDrop('num3')) with pytest.raises(FailedPreconditionError): stage.apply(df)