def test_vifs(): df = pd.DataFrame(np.random.randn(100, 2), columns=['a', 'b']) noise = np.random.randn(100) df['c'] = (3*df['a']) + (2*df['b']) + (.5*noise) vifs = variance_inflation_factors(df) assert type(vifs) == pd.Series assert vifs.max() > 5.0 assert vifs.idxmax() == 'c'
def test_flagging(): df = pd.DataFrame(np.random.randn(100, 2), columns=['a', 'b']) noise = np.random.randn(100) df['c'] = (3*df['a']) + (2*df['b']) + (.5*noise) diagnostics = Diagnostics(df) rows, cols = diagnostics.flag_all({'VIFs' : (lambda x: x > 0), 'RowMahalanobisDistances' : (lambda x: x > 0)}) # Everything should be flagged assert np.array_equal(rows, range(df.shape[0])) assert np.array_equal(cols, range(df.shape[1])) vif = variance_inflation_factors(df).max() rows, cols = diagnostics.flag_all({'VIFs' : (lambda x: x >= vif)}, include=['VIFs']) assert np.array_equal(rows, []) assert np.array_equal(cols, [2]) rows, cols = diagnostics.flag_all(exclude=['VIFs', 'ConditionIndices', 'Eigenvalues', 'CorrelationMatrix', 'RowMahalanobisDistances', 'ColumnMahalanobisDistances', 'Variances']) assert np.array_equal(rows, []) assert np.array_equal(cols, [])