def test_partial_correlations(): data = pd.DataFrame([[12, 14, 15], [24, 12, 52], [35, 12, 41], [23, 12, 42]]) expected = [[1.0, -0.730955, -0.50616], [-0.730955, 1.0, -0.928701], [-0.50616, -0.928701, 1.0]] expected = pd.DataFrame(expected, columns=[0, 1, 2], index=[0, 1, 2]) result = partial_correlations(data) assert_almost_equal(result, expected.values)
def test_partial_correlations_num_columns_greater(): # columns greater than rows data = pd.DataFrame([[23, 12, 23], [42, 25, 21]]) empty_array = np.empty((3, 3)) empty_array[:] = np.nan np.fill_diagonal(empty_array, 1.0) expected = pd.DataFrame(empty_array, columns=[0, 1, 2], index=[0, 1, 2]) result = partial_correlations(data) assert_almost_equal(result, expected.values)
def test_partial_correlations_catch_linalgerror(): # Covariance matrix that will be singular data = pd.DataFrame([[10, 10, 10, 10], [12, 12, 12, 12], [15, 15, 15, 15], [20, 20, 20, 20], [11, 11, 11, 11]]) empty_array = np.empty((4, 4)) empty_array[:] = np.nan np.fill_diagonal(empty_array, 1.0) expected = pd.DataFrame(empty_array, columns=[0, 1, 2, 3], index=[0, 1, 2, 3]) result = partial_correlations(data) assert_almost_equal(result, expected.values)
def calculate_kmo(x): """ Calculate the Kaiser-Meyer-Olkin criterion for items and overall. This statistic represents the degree to which each observed variable is predicted, without error, by the other variables in the dataset. In general, a KMO < 0.6 is considered inadequate. Parameters ---------- x : array-like The array from which to calculate KMOs. Returns ------- kmo_per_variable : numpy array The KMO score per item. kmo_total : float The KMO score overall. """ # calculate the partial correlations partial_corr = partial_correlations(x) # calcualte the pair-wise correlations x_corr = corr(x) # fill matrix diagonals with zeros # and square all elements np.fill_diagonal(x_corr, 0) np.fill_diagonal(partial_corr, 0) partial_corr = partial_corr**2 x_corr = x_corr**2 # calculate KMO per item partial_corr_sum = np.sum(partial_corr, axis=0) corr_sum = np.sum(x_corr, axis=0) kmo_per_item = corr_sum / (corr_sum + partial_corr_sum) # calculate KMO overall corr_sum_total = np.sum(x_corr) partial_corr_sum_total = np.sum(partial_corr) kmo_total = corr_sum_total / (corr_sum_total + partial_corr_sum_total) return kmo_per_item, kmo_total
def test_partial_correlations_with_zero_det(): # Covariance matrix that will be singular data = pd.DataFrame([[10, 10, 10, 10], [12, 12, 12, 12], [15, 15, 15, 15], [20, 20, 20, 20], [11, 11, 11, 11]]) expected = [[1.0, -0.9999999999999998, -0.9999999999999998, -0.9999999999999998], [-1.0000000000000004, 1.0, -1.0, -1.0], [-1.0000000000000004, -1.0, 1.0, -1.0], [-1.0000000000000004, -1.0, -1.0, 1.0]] expected = pd.DataFrame(expected) result = partial_correlations(data) assert_almost_equal(result, expected.values)