Example #1
0
def test_diagnostics(capfd):
    df = pd.DataFrame(np.random.randn(10, 5))
    diagnostics = Diagnostics(df)
    assert hasattr(diagnostics, 'results')
    assert 'Variances' in diagnostics.results
    diagnostics.summary()
    out, err = capfd.readouterr()
    assert 'Collinearity summary:' in out
    assert 'Outlier summary:' in out
    assert 'Validity summary:' in out
Example #2
0
def test_flagging():
    df = pd.DataFrame(np.random.randn(100, 2), columns=['a', 'b'])
    noise = np.random.randn(100)
    df['c'] = (3*df['a']) + (2*df['b']) + (.5*noise)
    diagnostics = Diagnostics(df)
    rows, cols = diagnostics.flag_all({'VIFs' : (lambda x: x > 0), 
                                'RowMahalanobisDistances' : (lambda x: x > 0)})
    # Everything should be flagged
    assert np.array_equal(rows, range(df.shape[0]))
    assert np.array_equal(cols, range(df.shape[1]))

    vif = variance_inflation_factors(df).max()
    rows, cols = diagnostics.flag_all({'VIFs' : (lambda x: x >= vif)}, 
                                        include=['VIFs'])
    assert np.array_equal(rows, [])
    assert np.array_equal(cols, [2])

    rows, cols = diagnostics.flag_all(exclude=['VIFs', 'ConditionIndices', 
                                        'Eigenvalues', 'CorrelationMatrix',
                                        'RowMahalanobisDistances', 'ColumnMahalanobisDistances',
                                        'Variances'])
    assert np.array_equal(rows, [])
    assert np.array_equal(cols, [])