Ejemplo n.º 1
0
def test_log_drop():
    df = _some_df()
    log_stage = Log(drop=True)
    res_df = log_stage(df)
    assert 'rank' in res_df.columns
    assert 'ph' in res_df.columns
    assert 'rank_log' not in res_df.columns
    assert 'ph_log' not in res_df.columns
    assert res_df['rank'][1] == 0
    assert_approx_equal(res_df['rank'][2], 2.079441, significant=5)
    assert_approx_equal(res_df['rank'][3], 1.098612, significant=5)
    assert_approx_equal(res_df['ph'][1], 1.163151, significant=5)
    assert_approx_equal(res_df['ph'][2], 1.974081, significant=5)
    assert_approx_equal(res_df['ph'][3], 2.493205, significant=5)

    # see only transform (no fit) when already fitted
    df2 = _some_df2()
    res_df2 = log_stage(df2)
    assert 'rank' in res_df2.columns
    assert 'ph' in res_df2.columns
    assert 'rank_log' not in res_df2.columns
    assert 'ph_log' not in res_df2.columns
    assert_approx_equal(res_df2['rank'][1], 1.098612, significant=5)
    assert_approx_equal(res_df2['rank'][2], 1.609437, significant=5)
    assert res_df2['rank'][3] == 0
    assert_approx_equal(res_df2['ph'][1], 1.481604, significant=5)
    assert_approx_equal(res_df2['ph'][2], 1.808288, significant=5)
    assert_approx_equal(res_df2['ph'][3], 0.262364, significant=5)
Ejemplo n.º 2
0
def test_log_non_neg_n_const_shift():
    df = _non_neg_df()
    log_stage = Log(non_neg=True, const_shift=0.1)
    res_df = log_stage(df)
    assert 'rank' in res_df.columns
    assert 'ph' in res_df.columns
    for col in df.columns:
        for i in df.index:
            assert res_df[col][i] == df[col][i]
    assert_approx_equal(res_df['rank_log'][1], -2.302585, significant=5)
    assert_approx_equal(res_df['rank_log'][2], 2.312534, significant=5)
    assert_approx_equal(res_df['rank_log'][3], 1.629240, significant=5)
    assert_approx_equal(res_df['ph_log'][1], 1.193922, significant=5)
    assert_approx_equal(res_df['ph_log'][2], 1.987874, significant=5)
    assert_approx_equal(res_df['ph_log'][3], 2.501435, significant=5)

    # see only transform (no fit) when already fitted
    df2 = _non_neg_df2()
    res_df2 = log_stage(df2, verbose=True)
    assert 'rank' in res_df2.columns
    assert 'ph' in res_df2.columns
    for col in df2.columns:
        for i in df2.index:
            assert res_df2[col][i] == df2[col][i]
    assert np.isnan(res_df2['rank_log'][1])
    assert_approx_equal(res_df2['rank_log'][2], 1.960094, significant=5)
    assert_approx_equal(res_df2['rank_log'][3], 1.131402, significant=5)
    assert_approx_equal(res_df2['ph_log'][1], 1.504077, significant=5)
    assert_approx_equal(res_df2['ph_log'][2], 1.824549, significant=5)
    assert_approx_equal(res_df2['ph_log'][3], 0.336472, significant=5)
Ejemplo n.º 3
0
def test_log_non_neg():
    df = _non_neg_df()
    log_stage = Log(non_neg=True)
    res_df = log_stage(df)
    assert 'rank' in res_df.columns
    assert 'ph' in res_df.columns
    for col in df.columns:
        for i in df.index:
            assert res_df[col][i] == df[col][i]
    assert res_df['rank_log'][1] == -np.inf
    assert_approx_equal(res_df['rank_log'][2], 2.302585, significant=5)
    assert_approx_equal(res_df['rank_log'][3], 1.609436, significant=5)
    assert_approx_equal(res_df['ph_log'][1], 1.163151, significant=5)
    assert_approx_equal(res_df['ph_log'][2], 1.974081, significant=5)
    assert_approx_equal(res_df['ph_log'][3], 2.493205, significant=5)

    # see only transform (no fit) when already fitted
    df2 = _non_neg_df2()
    res_df2 = log_stage(df2, verbose=True)
    assert 'rank' in res_df2.columns
    assert 'ph' in res_df2.columns
    for col in df2.columns:
        for i in df2.index:
            assert res_df2[col][i] == df2[col][i]
    assert np.isnan(res_df2['rank_log'][1])
    assert_approx_equal(res_df2['rank_log'][2], 1.945910, significant=5)
    assert_approx_equal(res_df2['rank_log'][3], 1.098612, significant=5)
    assert_approx_equal(res_df2['ph_log'][1], 1.481604, significant=5)
    assert_approx_equal(res_df2['ph_log'][2], 1.808288, significant=5)
    assert_approx_equal(res_df2['ph_log'][3], 0.262364, significant=5)

    # check fit_transform when already fitted
    df2 = _some_df2()
    res_df2 = log_stage.fit_transform(df2)
    assert 'rank' in res_df2.columns
    assert 'ph' in res_df2.columns
    for col in df2.columns:
        for i in df2.index:
            assert res_df2[col][i] == df2[col][i]
    assert_approx_equal(res_df2['rank_log'][1], 1.098612, significant=5)
    assert_approx_equal(res_df2['rank_log'][2], 1.609437, significant=5)
    assert res_df2['rank_log'][3] == 0
    assert_approx_equal(res_df2['ph_log'][1], 1.481604, significant=5)
    assert_approx_equal(res_df2['ph_log'][2], 1.808288, significant=5)
    assert_approx_equal(res_df2['ph_log'][3], 0.262364, significant=5)
Ejemplo n.º 4
0
def test_log_with_verbose():
    df = _some_df()
    log_stage = Log()
    res_df = log_stage(df, verbose=True)
    assert 'lbl' in res_df.columns
    assert 'rank' in res_df.columns
    assert 'ph' in res_df.columns
    for col in df.columns:
        for i in df.index:
            assert res_df[col][i] == df[col][i]
    assert res_df['rank_log'][1] == 0
    assert_approx_equal(res_df['rank_log'][2], 2.079441, significant=5)
    assert_approx_equal(res_df['rank_log'][3], 1.098612, significant=5)
    assert_approx_equal(res_df['ph_log'][1], 1.163151, significant=5)
    assert_approx_equal(res_df['ph_log'][2], 1.974081, significant=5)
    assert_approx_equal(res_df['ph_log'][3], 2.493205, significant=5)