Ejemplo n.º 1
0
def test_coldrop_non_str_lbl():
    """Testing the ColDrop pipeline stage."""
    df = _test_df2()
    assert 2 in df.columns
    stage = ColDrop(2)
    res_df = stage.apply(df)
    assert 2 not in res_df.columns
    assert 'num1' in res_df.columns
    assert False in res_df.columns
Ejemplo n.º 2
0
def test_coldrop_lambda():
    """Testing the ColDrop pipeline stage."""
    df = _test_df()
    assert 'num1' in df.columns
    assert 'num2' in df.columns
    stage = ColDrop(lambda col: 'num' in col.name)
    res_df = stage.apply(df)
    assert 'num1' not in res_df.columns
    assert 'num2' not in res_df.columns
    assert 'char' in res_df.columns
Ejemplo n.º 3
0
def test_coldrop_multi_col():
    """Testing the ColDrop pipeline stage."""
    df = _test_df()
    assert 'num1' in df.columns
    assert 'num2' in df.columns
    stage = ColDrop(['num1', 'num2'])
    res_df = stage.apply(df)
    assert 'num1' not in res_df.columns
    assert 'num2' not in res_df.columns
    assert 'char' in res_df.columns
Ejemplo n.º 4
0
def test_fitonly_w_coldrop():
    df = _test_df()
    stage = FitOnly(ColDrop('num1'))
    res = stage(df)
    assert 'num1' not in res.columns
    assert 'num2' in res.columns
    assert 'char' in res.columns

    df = _test_df()
    res = stage(df)
    assert 'num1' in res.columns
    assert 'num2' in res.columns
    assert 'char' in res.columns

    df = _test_df()
    res = stage.fit_transform(df)
    assert 'num1' not in res.columns
    assert 'num2' in res.columns
    assert 'char' in res.columns

    df = _test_df()
    res = stage(df, verbose=True)
    assert 'num1' in res.columns
    assert 'num2' in res.columns
    assert 'char' in res.columns
Ejemplo n.º 5
0
def test_coldrop_one_col():
    """Testing the ColDrop pipeline stage."""
    df = _test_df()
    assert 'num1' in df.columns
    stage = ColDrop('num1')
    res_df = stage.apply(df)
    assert 'num1' not in res_df.columns
    assert 'num2' in res_df.columns
    assert 'char' in res_df.columns

    # make sure fit is null operation for unfittable stages
    res_df = stage.fit(df)
    assert 'num1' in res_df.columns
    assert 'num2' in res_df.columns
    assert 'char' in res_df.columns

    res_df = stage.fit(df, verbose=True)
    assert 'num1' in res_df.columns
    assert 'num2' in res_df.columns
    assert 'char' in res_df.columns

    # make sure transform and fit_transform are equivalent to apply
    # for unfittable stages
    res_df = stage.transform(df)
    assert 'num1' not in res_df.columns
    assert 'num2' in res_df.columns
    assert 'char' in res_df.columns

    res_df = stage.transform(df, verbose=True)
    assert 'num1' not in res_df.columns
    assert 'num2' in res_df.columns
    assert 'char' in res_df.columns

    res_df = stage.fit_transform(df)
    assert 'num1' not in res_df.columns
    assert 'num2' in res_df.columns
    assert 'char' in res_df.columns

    res_df = stage.fit_transform(df, verbose=True)
    assert 'num1' not in res_df.columns
    assert 'num2' in res_df.columns
    assert 'char' in res_df.columns
Ejemplo n.º 6
0
def test_pipeline_error(time):
    """Test exceptions at pipeline level"""

    # test fit
    df = _test_df()
    func = lambda df: df['num1'] == df['num3']
    pipeline = PdPipeline([ColByFrameFunc("Equality", func), ColDrop("B")])
    with pytest.raises(PipelineApplicationError):
        pipeline.fit(df, verbose=True, time=time)

    # test transform
    df = _test_df()
    with pytest.raises(PipelineApplicationError):
        pipeline.transform(df, verbose=True, time=time)

    # test fit_transform
    df = _test_df()
    with pytest.raises(PipelineApplicationError):
        pipeline.fit_transform(df, verbose=True, time=time)
Ejemplo n.º 7
0
def test_coldrop_missing_col():
    """Testing the ColDrop pipeline stage."""
    df = _test_df()
    assert 'num1' in df.columns
    stage = ColDrop('num3')
    with pytest.raises(FailedPreconditionError):
        res_df = stage.apply(df)

    res_df = stage.apply(df, exraise=False)
    assert res_df.equals(df)

    # make sure fit is null operation for unfittable stages
    with pytest.raises(FailedPreconditionError):
        res_df = stage.fit(df)

    with pytest.raises(FailedPreconditionError):
        res_df = stage.fit(df, verbose=True)

    res_df = stage.fit(df, exraise=False)
    assert res_df.equals(df)

    # make sure transform and fit_transform are equivalent to apply
    # for unfittable stages
    with pytest.raises(FailedPreconditionError):
        res_df = stage.transform(df)

    with pytest.raises(FailedPreconditionError):
        res_df = stage.transform(df, verbose=True)

    res_df = stage.transform(df, exraise=False)
    assert res_df.equals(df)

    with pytest.raises(FailedPreconditionError):
        res_df = stage.fit_transform(df)

    with pytest.raises(FailedPreconditionError):
        res_df = stage.fit_transform(df, verbose=True)

    res_df = stage.fit_transform(df, exraise=False)
    assert res_df.equals(df)

    stage = ColDrop('num3', errors='ignore')
    res_df = stage.apply(df)
    assert res_df.equals(df)

    # make sure fit is null operation for unfittable stages
    res_df = stage.fit(df)
    assert res_df.equals(df)

    # make sure transform and fit_transform are equivalent to apply
    # for unfittable stages
    res_df = stage.fit_transform(df)
    assert res_df.equals(df)

    res_df = stage.transform(df)
    assert res_df.equals(df)
Ejemplo n.º 8
0
def test_fitonly_w_coldrop_missing_col():
    df = _test_df()
    stage = FitOnly(ColDrop('num3'))
    with pytest.raises(FailedPreconditionError):
        stage.apply(df)