예제 #1
0
def test_winsorize():
    outlier_test = pd.DataFrame([
        92, 19, 101, 58, 1053, 91, 26, 78, 10, 13, -40, 101, 86, 85, 15, 89,
        89, 28, -5, 41
    ])

    out = winsorize(outlier_test,
                    cutoff={
                        'quantile': [0.05, .95]
                    },
                    replace_with_cutoff=False).values.squeeze()
    correct_result = np.array([
        92, 19, 101, 58, 101, 91, 26, 78, 10, 13, -5, 101, 86, 85, 15, 89, 89,
        28, -5, 41
    ])
    assert (np.sum(out == correct_result) == 20)

    out = winsorize(outlier_test,
                    cutoff={
                        'std': [2, 2]
                    },
                    replace_with_cutoff=False).values.squeeze()
    correct_result = np.array([
        92, 19, 101, 58, 101, 91, 26, 78, 10, 13, -40, 101, 86, 85, 15, 89, 89,
        28, -5, 41
    ])
    assert (np.sum(out == correct_result) == 20)

    out = winsorize(outlier_test,
                    cutoff={
                        'std': [2, 2]
                    },
                    replace_with_cutoff=True).values.squeeze()
    correct_result = np.array([
        92., 19., 101., 58., 556.97961997, 91., 26., 78., 10., 13., -40., 101.,
        86., 85., 15., 89., 89., 28., -5., 41.
    ])
    assert (np.round(np.mean(out)) == np.round(np.mean(correct_result)))
예제 #2
0
def test_winsorize():
    outlier_test = pd.DataFrame([
        92,
        19,
        101,
        58,
        1053,
        91,
        26,
        78,
        10,
        13,
        -40,
        101,
        86,
        85,
        15,
        89,
        89,
        28,
        -5,
        41,
    ])

    out = winsorize(outlier_test,
                    cutoff={
                        "quantile": [0.05, 0.95]
                    },
                    replace_with_cutoff=False).values.squeeze()
    correct_result = np.array([
        92,
        19,
        101,
        58,
        101,
        91,
        26,
        78,
        10,
        13,
        -5,
        101,
        86,
        85,
        15,
        89,
        89,
        28,
        -5,
        41,
    ])
    assert np.sum(out == correct_result) == 20

    out = winsorize(outlier_test,
                    cutoff={
                        "std": [2, 2]
                    },
                    replace_with_cutoff=False).values.squeeze()
    correct_result = np.array([
        92,
        19,
        101,
        58,
        101,
        91,
        26,
        78,
        10,
        13,
        -40,
        101,
        86,
        85,
        15,
        89,
        89,
        28,
        -5,
        41,
    ])
    assert np.sum(out == correct_result) == 20

    out = winsorize(outlier_test,
                    cutoff={
                        "std": [2, 2]
                    },
                    replace_with_cutoff=True).values.squeeze()
    correct_result = np.array([
        92.0,
        19.0,
        101.0,
        58.0,
        556.97961997,
        91.0,
        26.0,
        78.0,
        10.0,
        13.0,
        -40.0,
        101.0,
        86.0,
        85.0,
        15.0,
        89.0,
        89.0,
        28.0,
        -5.0,
        41.0,
    ])
    assert np.round(np.mean(out)) == np.round(np.mean(correct_result))