Beispiel #1
0
def test_maui_clusters_picks_optimal_k_by_ami():
    ami_mock = mock.Mock()
    ami_mock.side_effect = [
        2,
        3,
        1,
    ]  # the optimal AMI will be given at the second trial
    with mock.patch("sklearn.metrics.adjusted_mutual_info_score", ami_mock):
        maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
        maui_model.z_ = pd.DataFrame(
            np.random.randn(10, 2),
            index=[f"sample {i}" for i in range(10)],
            columns=["LF1", "LF2"],
        )
        maui_model.x_ = pd.DataFrame(
            np.random.randn(20, 10),
            index=[f"feature {i}" for i in range(20)],
            columns=[f"sample {i}" for i in range(10)],
        )

        the_y = pd.Series(np.arange(10), index=maui_model.z_.index)

        maui_model.cluster(
            ami_y=the_y, optimal_k_range=[1, 2, 3]
        )  # the second trial is k=2

        assert maui_model.optimal_k_ == 2
Beispiel #2
0
def test_maui_clusters_picks_optimal_k_by_silhouette():
    silhouette_mock = mock.Mock()
    silhouette_mock.side_effect = [2,3,1] # the optimal silhouette will be given at the second trial
    with mock.patch('sklearn.metrics.silhouette_score', silhouette_mock):
        maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
        maui_model.z_ = pd.DataFrame(np.random.randn(10,2),
            index=[f'sample {i}' for i in range(10)],
            columns=['LF1', 'LF2'])
        maui_model.x_ = pd.DataFrame(np.random.randn(20,10),
            index=[f'feature {i}' for i in range(20)],
            columns=[f'sample {i}' for i in range(10)])
        maui_model.cluster(optimal_k_method='silhouette', optimal_k_range=[1,2,3]) # the second trial is k=2

        assert maui_model.optimal_k_ == 2
Beispiel #3
0
def test_maui_clusters_picks_optimal_k_with_custom_scoring():
    scorer = mock.Mock()
    scorer.side_effect = [2,3,1] # the optimal AMI will be given at the second trial
    scorer.__name__ = 'mock_scorer'

    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(np.random.randn(10,2),
        index=[f'sample {i}' for i in range(10)],
        columns=['LF1', 'LF2'])
    maui_model.x_ = pd.DataFrame(np.random.randn(20,10),
        index=[f'feature {i}' for i in range(20)],
        columns=[f'sample {i}' for i in range(10)])
    maui_model.cluster(optimal_k_method=scorer, optimal_k_range=[1,2,3]) # the second trial is k=2

    assert maui_model.optimal_k_ == 2
Beispiel #4
0
def test_maui_clusters_with_single_k():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(np.random.randn(10,2),
        index=[f'sample {i}' for i in range(10)],
        columns=['LF1', 'LF2'])
    maui_model.x_ = pd.DataFrame(np.random.randn(20,10),
        index=[f'feature {i}' for i in range(20)],
        columns=[f'sample {i}' for i in range(10)])

    yhat = maui_model.cluster(5)
    assert yhat.shape == (10,)
Beispiel #5
0
def test_maui_clusters_only_samples_in_y_index_when_optimizing():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(np.random.randn(10,2),
        index=[f'sample {i}' for i in range(10)],
        columns=['LF1', 'LF2'])
    maui_model.x_ = pd.DataFrame(np.random.randn(20,10),
        index=[f'feature {i}' for i in range(20)],
        columns=[f'sample {i}' for i in range(10)])

    y = pd.Series(['a','a','a','b','b','b'],
        index=[f'sample {i}' for i in range(6)])

    yhat = maui_model.cluster(ami_y=y, optimal_k_range=[1,2,3])
    assert set(yhat.index) == set(y.index)
def test_maui_clusters_only_samples_in_y_index_when_optimizing():
    np.random.seed(0)
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(
        np.random.randn(10, 2),
        index=[f"sample {i}" for i in range(10)],
        columns=["LF1", "LF2"],
    )
    maui_model.x_ = pd.DataFrame(
        np.random.randn(20, 10),
        index=[f"feature {i}" for i in range(20)],
        columns=[f"sample {i}" for i in range(10)],
    )

    y = pd.Series(["a", "a", "a", "b", "b", "b"],
                  index=[f"sample {i}" for i in range(6)])

    yhat = maui_model.cluster(ami_y=y, optimal_k_range=[1, 2, 3])
    assert set(yhat.index) == set(y.index)