Example #1
0
def test_generalized_average():
    a, b = 1, 2
    methods = ["min", "geometric", "arithmetic", "max"]
    means = [_generalized_average(a, b, method) for method in methods]
    assert means[0] <= means[1] <= means[2] <= means[3]
    c, d = 12, 12
    means = [_generalized_average(c, d, method) for method in methods]
    assert means[0] == means[1] == means[2] == means[3]
def test_generalized_average():
    a, b = 1, 2
    methods = ["min", "geometric", "arithmetic", "max"]
    means = [_generalized_average(a, b, method) for method in methods]
    assert means[0] <= means[1] <= means[2] <= means[3]
    c, d = 12, 12
    means = [_generalized_average(c, d, method) for method in methods]
    assert means[0] == means[1] == means[2] == means[3]
Example #3
0
def _ami(ab_cts, average_method='arithmetic'):
    """Adjusted mutual information between two discrete categorical random variables
    based on counts observed and provided in ab_cts.

    Code adapted directly from scikit learn AMI to
    accomodate having counts/contingency table instead of rows/instances:
    https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html

    Parameters
    ----------
    ab_cts : np.ndarray [len(a_classes) x len(b_classes)
        Counts for each combination of classes in random variables a and b
        organized in a rectangular array.
    average_method : str
        See sklearn documentation for details

    Returns
    -------
    ami : float
        Adjusted mutual information score for variables a and b"""
    a_freq = np.sum(ab_cts, axis=1)
    a_freq = a_freq / np.sum(a_freq)
    b_freq = np.sum(ab_cts, axis=0)
    b_freq = b_freq / np.sum(b_freq)
    n_samples = np.sum(ab_cts)
    """ Calculate the MI for the two clusterings
    contingency is a joint count distribution [a_classes x b_classes]"""
    mi = mutual_info_score(None, None, contingency=ab_cts)
    """Calculate the expected value for the mutual information"""
    emi = expected_mutual_information(ab_cts, n_samples)
    """Calculate entropy"""
    h_true, h_pred = _entropy(a_freq), _entropy(b_freq)
    normalizer = _generalized_average(h_true, h_pred, average_method)
    denominator = normalizer - emi

    if denominator < 0:
        denominator = min(denominator, -np.finfo('float64').eps)
    else:
        denominator = max(denominator, np.finfo('float64').eps)
    ami = (mi - emi) / denominator
    return ami