Example #1
0
def test_shc_semi_supervised_scoring_data_affinity():
    """Test semi-supervised learning for SHC when scoring_data='affinity'."""
    # Passing feature matrix
    X1, y1 = generate_data(supervised=True, affinity=False)

    def _scoring1(X_affinity, labels_true, labels_pred):
        assert X_affinity.shape[0] == X_affinity.shape[1]
        assert X_affinity.shape != X1.shape
        score = b3_f_score(labels_true, labels_pred)
        return score

    clusterer = ScipyHierarchicalClustering(scoring=_scoring1,
                                            scoring_data="affinity",
                                            affinity=euclidean_distances)
    clusterer.fit(X1, y1)
    labels = clusterer.labels_
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))

    # Passing affinity matrix
    X2, y2 = generate_data(supervised=True, affinity=True)

    def _scoring2(X_affinity, labels_true, labels_pred):
        assert X_affinity.shape[0] == X_affinity.shape[1]
        assert X_affinity.shape == X2.shape
        score = b3_f_score(labels_true, labels_pred)
        return score

    clusterer = ScipyHierarchicalClustering(scoring=_scoring2,
                                            scoring_data="affinity",
                                            affinity="precomputed")
    clusterer.fit(X2, y2)
    labels = clusterer.labels_
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))
Example #2
0
def hcluster(X, attrs):
    """
    Hierarchical Clustering.
    Return Example:
        {'children': [
            {'children': [], 'name': 2, 'value': 150.0039243544126},
            {'children': [
                {'children': [], 'name': 1, 'value': 2.509279181210386},
                {'children': [
                    {'children': [], 'name': 0, 'value': 2.4987419269136737},
                    {'children': [], 'name': 3, 'value': 2.4987419269136737}
                ], 'name': 4,'value': 4.997483853827347}
            ], 'name': 5, 'value': 5.018558362420772}
        ], 'name': 6, 'value': 300.0078487088252}
    """
    n_clusters = int(attrs['kNumber'])
    hcluster = ScipyHierarchicalClustering(method=attrs['distance'],
                                           affinity=attrs['affinity'],
                                           n_clusters=n_clusters)

    hcluster.fit(X)
    labels = hcluster.labels_

    # Z = hcluster.linkage_
    # return HClusterTree(Z).to_dict()

    save_clusterer(hcluster)
    return scatterplot(X, labels, n_clusters)
Example #3
0
def hcluster(X, attrs):
    """
    Hierarchical Clustering.
    Return Example:
        {'children': [
            {'children': [], 'name': 2, 'value': 150.0039243544126},
            {'children': [
                {'children': [], 'name': 1, 'value': 2.509279181210386},
                {'children': [
                    {'children': [], 'name': 0, 'value': 2.4987419269136737},
                    {'children': [], 'name': 3, 'value': 2.4987419269136737}
                ], 'name': 4,'value': 4.997483853827347}
            ], 'name': 5, 'value': 5.018558362420772}
        ], 'name': 6, 'value': 300.0078487088252}
    """
    n_clusters = int(attrs['kNumber'])
    hcluster = ScipyHierarchicalClustering(method=attrs['distance'],
                                           affinity=attrs['affinity'],
                                           n_clusters=n_clusters)

    hcluster.fit(X)
    labels = hcluster.labels_

    # Z = hcluster.linkage_
    # return HClusterTree(Z).to_dict()

    # save_clusterer(hcluster)
    return scatterplot(X, labels, n_clusters)
Example #4
0
def test_shc_semi_supervised_scoring_data_none():
    """Test semi-supervised learning for SHC when scoring_data is None."""
    X, y = generate_data(supervised=True, affinity=False)

    def _scoring(labels_true, labels_pred):
        score = b3_f_score(labels_true, labels_pred)
        return score

    # We should find all 4 clusters
    clusterer = ScipyHierarchicalClustering(scoring=_scoring)
    clusterer.fit(X, y)
    labels = clusterer.labels_
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))
Example #5
0
def test_shc_semi_supervised_scoring_data_raw():
    """Test semi-supervised learning for SHC when scoring_data='raw'."""
    X, y = generate_data(supervised=True, affinity=False)

    def _scoring(X_raw, labels_true, labels_pred):
        assert X_raw.shape == X.shape
        score = b3_f_score(labels_true, labels_pred)
        return score

    clusterer = ScipyHierarchicalClustering(scoring=_scoring,
                                            scoring_data="raw")
    clusterer.fit(X, y)
    labels = clusterer.labels_
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))