Esempio n. 1
0
def test_partial_correlations():

    data = pd.DataFrame([[12, 14, 15], [24, 12, 52], [35, 12, 41],
                         [23, 12, 42]])

    expected = [[1.0, -0.730955, -0.50616], [-0.730955, 1.0, -0.928701],
                [-0.50616, -0.928701, 1.0]]

    expected = pd.DataFrame(expected, columns=[0, 1, 2], index=[0, 1, 2])

    result = partial_correlations(data)
    assert_almost_equal(result, expected.values)
Esempio n. 2
0
def test_partial_correlations_num_columns_greater():

    # columns greater than rows
    data = pd.DataFrame([[23, 12, 23], [42, 25, 21]])

    empty_array = np.empty((3, 3))
    empty_array[:] = np.nan
    np.fill_diagonal(empty_array, 1.0)

    expected = pd.DataFrame(empty_array, columns=[0, 1, 2], index=[0, 1, 2])

    result = partial_correlations(data)
    assert_almost_equal(result, expected.values)
Esempio n. 3
0
def test_partial_correlations_catch_linalgerror():

    # Covariance matrix that will be singular
    data = pd.DataFrame([[10, 10, 10, 10], [12, 12, 12, 12], [15, 15, 15, 15],
                         [20, 20, 20, 20], [11, 11, 11, 11]])

    empty_array = np.empty((4, 4))
    empty_array[:] = np.nan
    np.fill_diagonal(empty_array, 1.0)

    expected = pd.DataFrame(empty_array,
                            columns=[0, 1, 2, 3],
                            index=[0, 1, 2, 3])

    result = partial_correlations(data)
    assert_almost_equal(result, expected.values)
Esempio n. 4
0
def calculate_kmo(x):
    """
    Calculate the Kaiser-Meyer-Olkin criterion
    for items and overall. This statistic represents
    the degree to which each observed variable is
    predicted, without error, by the other variables
    in the dataset. In general, a KMO < 0.6 is considered
    inadequate.

    Parameters
    ----------
    x : array-like
        The array from which to calculate KMOs.

    Returns
    -------
    kmo_per_variable : numpy array
        The KMO score per item.
    kmo_total : float
        The KMO score overall.
    """

    # calculate the partial correlations
    partial_corr = partial_correlations(x)

    # calcualte the pair-wise correlations
    x_corr = corr(x)

    # fill matrix diagonals with zeros
    # and square all elements
    np.fill_diagonal(x_corr, 0)
    np.fill_diagonal(partial_corr, 0)

    partial_corr = partial_corr**2
    x_corr = x_corr**2

    # calculate KMO per item
    partial_corr_sum = np.sum(partial_corr, axis=0)
    corr_sum = np.sum(x_corr, axis=0)
    kmo_per_item = corr_sum / (corr_sum + partial_corr_sum)

    # calculate KMO overall
    corr_sum_total = np.sum(x_corr)
    partial_corr_sum_total = np.sum(partial_corr)
    kmo_total = corr_sum_total / (corr_sum_total + partial_corr_sum_total)
    return kmo_per_item, kmo_total
def test_partial_correlations_with_zero_det():

    # Covariance matrix that will be singular
    data = pd.DataFrame([[10, 10, 10, 10],
                         [12, 12, 12, 12],
                         [15, 15, 15, 15],
                         [20, 20, 20, 20],
                         [11, 11, 11, 11]])

    expected = [[1.0,
                 -0.9999999999999998,
                 -0.9999999999999998,
                 -0.9999999999999998],
                [-1.0000000000000004, 1.0, -1.0, -1.0],
                [-1.0000000000000004, -1.0, 1.0, -1.0],
                [-1.0000000000000004, -1.0, -1.0, 1.0]]
    expected = pd.DataFrame(expected)

    result = partial_correlations(data)
    assert_almost_equal(result, expected.values)