def _entropy_scores(cm): """Given a ClusteringMetrics object, calculate three entropy-based metrics (Alternative implementation for testing) """ H_C = fentropy(cm.row_totals) H_K = fentropy(cm.col_totals) H_CK = sum(fentropy(col) for col in cm.iter_cols()) H_KC = sum(fentropy(row) for row in cm.iter_rows()) # The '<=' comparisons below both prevent division by zero errors # and ensure that the scores are non-negative. homogeneity = 0.0 if H_C <= H_CK else (H_C - H_CK) / H_C completeness = 0.0 if H_K <= H_KC else (H_K - H_KC) / H_K nmi_score = harmonic_mean(homogeneity, completeness) return homogeneity, completeness, nmi_score
def test_entropy_of_counts_zero(): """Returns zero for empty set """ val = fentropy([]) assert_almost_equal(val, 0.0000, 4)