Ejemplo n.º 1
0
def test_vifs():
    df = pd.DataFrame(np.random.randn(100, 2), columns=['a', 'b'])
    noise = np.random.randn(100)
    df['c'] = (3*df['a']) + (2*df['b']) + (.5*noise)
    vifs = variance_inflation_factors(df)
    assert type(vifs) == pd.Series
    assert vifs.max() > 5.0
    assert vifs.idxmax() == 'c'
Ejemplo n.º 2
0
def test_flagging():
    df = pd.DataFrame(np.random.randn(100, 2), columns=['a', 'b'])
    noise = np.random.randn(100)
    df['c'] = (3*df['a']) + (2*df['b']) + (.5*noise)
    diagnostics = Diagnostics(df)
    rows, cols = diagnostics.flag_all({'VIFs' : (lambda x: x > 0), 
                                'RowMahalanobisDistances' : (lambda x: x > 0)})
    # Everything should be flagged
    assert np.array_equal(rows, range(df.shape[0]))
    assert np.array_equal(cols, range(df.shape[1]))

    vif = variance_inflation_factors(df).max()
    rows, cols = diagnostics.flag_all({'VIFs' : (lambda x: x >= vif)}, 
                                        include=['VIFs'])
    assert np.array_equal(rows, [])
    assert np.array_equal(cols, [2])

    rows, cols = diagnostics.flag_all(exclude=['VIFs', 'ConditionIndices', 
                                        'Eigenvalues', 'CorrelationMatrix',
                                        'RowMahalanobisDistances', 'ColumnMahalanobisDistances',
                                        'Variances'])
    assert np.array_equal(rows, [])
    assert np.array_equal(cols, [])