Exemple #1
0
def test_gstat_optics():
    """Test the GapStatClustering using OPTICS as the
    base clusterer.
    """
    # OPTICS is incompatble with GapStatClustering
    # because it doesn't allow number of clusters to be specified
    with pytest.raises(AttributeError):
        gapstat.GapStatClustering(base_clusterer=OPTICS())
Exemple #2
0
def test_gstat_meanshift():
    """Test the GapStatClustering using MeanShift as the
    base clusterer.
    """
    # meanshift is incompatble with GapStatClustering
    # because it doesn't allow number of clusters to be specified
    with pytest.raises(AttributeError):
        gapstat.GapStatClustering(base_clusterer=MeanShift())
Exemple #3
0
def test_gstat_dbscan():
    """Test the GapStatClustering using DBSCAN as the
    base clusterer.
    """
    # dbscan is incompatble with GapStatClustering
    # because it doesn't allow number of clusters to be specified
    with pytest.raises(AttributeError):
        gapstat.GapStatClustering(base_clusterer=DBSCAN())
Exemple #4
0
def test_gstat_affinitypropagation():
    """Test the GapStatClustering using AffinityPropagation as the
    base clusterer.
    """
    # affinitypropagation is incompatble with GapStatClustering
    # because it doesn't allow number of clusters to be specified
    with pytest.raises(AttributeError):
        gapstat.GapStatClustering(base_clusterer=AffinityPropagation())
Exemple #5
0
def _run_transform_test(base_clusterer=None):
    """Test GapStatClustering.transform() and GapStatClustering.fit_transform()
    methods using the specified base clusterer.
    """
    # construct test data
    T, _, K = _quad4(center=[0, 0], offset1=[0.5, 0.5], offset2=[2, 2])
    n_samples = T.shape[0]

    # create gapstat clusterer with base clusterer
    gstat = gapstat.GapStatClustering(base_clusterer=base_clusterer)
    base_clusterer = gstat.base_clusterer

    # test transform() before fit()
    # adapt test case to the base_cluster capabilities
    if (hasattr(base_clusterer, 'transform')):
        # test transform() before fit()
        with pytest.raises(NotFittedError):
            gstat.transform(T)
    else:
        # test unsupported transform()
        with pytest.raises(AttributeError):
            gstat.transform(T)

    # test when max_k is too small
    # adapt test case to the base_cluster capabilities
    if (hasattr(base_clusterer, 'fit_transform')):
        # test when max_k is too small
        gstat.set_params(max_k=K-1)
        with pytest.raises(NotFittedError):
            gstat.fit_transform(T)
    else:
        # test unsupported fit_transform()
        with pytest.raises(AttributeError):
            gstat.fit_transform(T)

    # determine expected number of columns in transformed data
    # -- in most cases this is the number of clusters
    # -- for birch this is the number of subclusters
    if (isinstance(base_clusterer, Birch)):
        n_features = len(base_clusterer.subcluster_centers_)
    else:
        n_features = K

    # test fit_transform()
    # adapt test case to the base_cluster capabilities
    if (hasattr(base_clusterer, 'fit_transform')):
        gstat.set_params(max_k=K)
        transformed_T = gstat.fit_transform(T)
        _check_transformed(transformed_T, n_samples, n_features)

    # test transform()
    # adapt test case to the base_cluster capabilities
    if (hasattr(base_clusterer, 'transform')):
        transformed_T = gstat.transform(T)
        _check_transformed(transformed_T, n_samples, n_features)
Exemple #6
0
def test_gstat_featureagglomeration():
    """Test the GapStatClustering using FeatureAgglomeration as the
    base clusterer.
    """
    # FeatureAgglomeration is incompatble with GapStatClustering
    # because it transforms the data set before clusering
    # which breaks some of the gapstat logic
    # Instead, use AgglomerativeClustering and transform the
    # data set before fitting the data
    with pytest.raises(AttributeError):
        gapstat.GapStatClustering(base_clusterer=FeatureAgglomeration())
Exemple #7
0
def _run_fit_test(base_clusterer=None):
    """Test GapStatClustering.fit() method using the specified base clusterer.
    """
    # construct test data
    T, _, K = _quad4(center=[0, 0], offset1=[0.5, 0.5], offset2=[2, 2])
    n_samples = T.shape[0]

    # create gapstat clusterer with base clusterer
    gstat = gapstat.GapStatClustering(base_clusterer=base_clusterer)
    base_clusterer = gstat.base_clusterer

    # test when max_k is too small
    gstat.set_params(max_k=K-1)
    with pytest.raises(NotFittedError):
        gstat.fit(T)

    # test fit()
    gstat.set_params(max_k=K)
    gstat = gstat.fit(T)
    _check_labels(gstat.labels_, gstat.n_clusters_, n_samples, K)
Exemple #8
0
def _run_predict_test(base_clusterer=None):
    """Test GapStatClustering.predict() and GapStatClustering.fit_predict()
    methods using the specified base clusterer.
    """
    # construct test data
    T, _, K = _quad4(center=[0, 0], offset1=[0.5, 0.5], offset2=[2, 2])
    n_samples = T.shape[0]

    # create gapstat clusterer with base clusterer
    gstat = gapstat.GapStatClustering(base_clusterer=base_clusterer)
    base_clusterer = gstat.base_clusterer

    # test predict() before fit()
    # adapt test case to the base_cluster capabilities
    if (hasattr(base_clusterer, 'predict')):
        # test predict() before fit()
        with pytest.raises(NotFittedError):
            gstat.predict(T)
    else:
        # test unsupported predict()
        with pytest.raises(AttributeError):
            gstat.predict(T)

    # test when max_k is too small
    gstat.set_params(max_k=K-1)
    with pytest.raises(NotFittedError):
        gstat.fit_predict(T)

    # test fit_predict()
    gstat.set_params(max_k=K)
    predicted_labels = gstat.fit_predict(T)
    _check_labels(predicted_labels, gstat.n_clusters_, n_samples, K)
    _check_labels(gstat.labels_, gstat.n_clusters_, n_samples, K)

    # test predict()
    # adapt test case to the base_cluster capabilities
    if (hasattr(base_clusterer, 'predict')):
        predicted_labels = gstat.predict(T)
        _check_labels(predicted_labels, gstat.n_clusters_, n_samples, K)
        _check_labels(gstat.labels_, gstat.n_clusters_, n_samples, K)