Ejemplo n.º 1
0
def test_pipeline_stage_to_pipeline_addition():
    """Testing something."""
    drop_num1 = SilentDropStage('num1')
    drop_num2 = SilentDropStage('num2')
    pipeline = PdPipeline([drop_num1])
    assert len(pipeline) == 1
    pipeline = drop_num2 + pipeline
    assert len(pipeline) == 2
    df = _test_df()
    res_df = pipeline.apply(df, verbose=True)
    assert 'num1' not in res_df.columns
    assert 'num2' not in res_df.columns
    assert 'char' in res_df.columns
Ejemplo n.º 2
0
def test_pipeline_slice():
    """Testing something."""
    drop_num1 = SilentDropStage('num1')
    drop_num2 = SilentDropStage('num2')
    drop_char = SilentDropStage('char')
    pipeline = PdPipeline([drop_num1, drop_num2, drop_char])
    assert len(pipeline) == 3
    pipeline = pipeline[0:2]
    assert len(pipeline) == 2
    df = _test_df()
    res_df = pipeline.apply(df, verbose=True)
    assert 'num1' not in res_df.columns
    assert 'num2' not in res_df.columns
    assert 'char' in res_df.columns
Ejemplo n.º 3
0
def test_pipeline_slice_by_name():
    """Testing something."""
    drop_num1 = SilentDropStage('num1', name='dropNum1')
    drop_num2 = SilentDropStage('num2', name='dropNum2')
    drop_char = SilentDropStage('char', name='dropChar')
    pipeline = PdPipeline([drop_num1, drop_num2, drop_char])
    assert len(pipeline) == 3
    pipeline = pipeline[['dropNum1', 'dropNum2']]
    assert len(pipeline) == 2
    assert pipeline['dropNum1'] == drop_num1
    with pytest.raises(ValueError) as e:
        pipeline['dropChar']
    assert str(e.value) == "'dropChar' is not exist."
    df = _test_df()
    res_df = pipeline.apply(df, verbose=True)
    assert 'num1' not in res_df.columns
    assert 'num2' not in res_df.columns
    assert 'char' in res_df.columns
Ejemplo n.º 4
0
def test_pipeline_index():
    """Testing something."""
    df = _test_df()
    drop_num1 = SilentDropStage('num1')
    drop_num2 = SilentDropStage('num2')
    drop_char = SilentDropStage('char')
    pipeline = PdPipeline([drop_num1, drop_num2, drop_char])
    assert len(pipeline) == 3
    assert pipeline[0] == drop_num1
    assert 'num1' not in pipeline[0](df).columns
    assert pipeline[1] == drop_num2
    assert 'num2' not in pipeline[1](df).columns
    assert pipeline[2] == drop_char
    assert 'char' not in pipeline[2](df).columns
Ejemplo n.º 5
0
def test_pipeline_error(time):
    """Test exceptions at pipeline level"""

    # test fit
    df = _test_df()
    func = lambda df: df['num1'] == df['num3']
    pipeline = PdPipeline([ColByFrameFunc("Equality", func), ColDrop("B")])
    with pytest.raises(PipelineApplicationError):
        pipeline.fit(df, verbose=True, time=time)

    # test transform
    df = _test_df()
    with pytest.raises(PipelineApplicationError):
        pipeline.transform(df, verbose=True, time=time)

    # test fit_transform
    df = _test_df()
    with pytest.raises(PipelineApplicationError):
        pipeline.fit_transform(df, verbose=True, time=time)
Ejemplo n.º 6
0
def test_two_stage_pipeline_stage():
    """Testing something."""
    drop_num1 = SilentDropStage('num1')
    drop_num2 = SilentDropStage('num2')
    pipeline = PdPipeline([drop_num1, drop_num2])
    assert len(pipeline) == 2
    df = _test_df()
    res_df = pipeline.apply(df, verbose=True)
    assert 'num1' not in res_df.columns
    assert 'num2' not in res_df.columns
    assert 'char' in res_df.columns
    str(pipeline)

    # test fit
    df = _test_df()
    res_df = pipeline.fit(df, verbose=True)
    for x in ['num1', 'num2', 'char']:
        assert x in res_df.columns

    # test transform
    df = _test_df()
    res_df = pipeline.transform(df, verbose=True)
    assert 'num1' not in res_df.columns
    assert 'num2' not in res_df.columns
    assert 'char' in res_df.columns

    # test fit_transform
    df = _test_df()
    res_df = pipeline.fit_transform(df, verbose=True)

    # test get_transformer
    trs = lambda pipline: pipeline[:1]  # noqa: E731
    pipeline = PdPipeline([drop_num1, drop_num2], transformer_getter=trs)
    transformer = pipeline.get_transformer()
    res_df = transformer(df, verbose=True)
    assert 'num1' not in res_df.columns
    assert 'num2' in res_df.columns
    assert 'char' in res_df.columns
Ejemplo n.º 7
0
def test_pipeline_to_int_addition():
    """Testing something."""
    pipeline = PdPipeline([SilentDropStage('num1')])
    with pytest.raises(TypeError):
        res = pipeline + 43
        assert not isinstance(res, PdPipeline)