def test_maui_clusters_picks_optimal_k_by_ami(): ami_mock = mock.Mock() ami_mock.side_effect = [ 2, 3, 1, ] # the optimal AMI will be given at the second trial with mock.patch("sklearn.metrics.adjusted_mutual_info_score", ami_mock): maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1) maui_model.z_ = pd.DataFrame( np.random.randn(10, 2), index=[f"sample {i}" for i in range(10)], columns=["LF1", "LF2"], ) maui_model.x_ = pd.DataFrame( np.random.randn(20, 10), index=[f"feature {i}" for i in range(20)], columns=[f"sample {i}" for i in range(10)], ) the_y = pd.Series(np.arange(10), index=maui_model.z_.index) maui_model.cluster( ami_y=the_y, optimal_k_range=[1, 2, 3] ) # the second trial is k=2 assert maui_model.optimal_k_ == 2
def test_maui_clusters_picks_optimal_k_by_silhouette(): silhouette_mock = mock.Mock() silhouette_mock.side_effect = [2,3,1] # the optimal silhouette will be given at the second trial with mock.patch('sklearn.metrics.silhouette_score', silhouette_mock): maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1) maui_model.z_ = pd.DataFrame(np.random.randn(10,2), index=[f'sample {i}' for i in range(10)], columns=['LF1', 'LF2']) maui_model.x_ = pd.DataFrame(np.random.randn(20,10), index=[f'feature {i}' for i in range(20)], columns=[f'sample {i}' for i in range(10)]) maui_model.cluster(optimal_k_method='silhouette', optimal_k_range=[1,2,3]) # the second trial is k=2 assert maui_model.optimal_k_ == 2
def test_maui_clusters_picks_optimal_k_with_custom_scoring(): scorer = mock.Mock() scorer.side_effect = [2,3,1] # the optimal AMI will be given at the second trial scorer.__name__ = 'mock_scorer' maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1) maui_model.z_ = pd.DataFrame(np.random.randn(10,2), index=[f'sample {i}' for i in range(10)], columns=['LF1', 'LF2']) maui_model.x_ = pd.DataFrame(np.random.randn(20,10), index=[f'feature {i}' for i in range(20)], columns=[f'sample {i}' for i in range(10)]) maui_model.cluster(optimal_k_method=scorer, optimal_k_range=[1,2,3]) # the second trial is k=2 assert maui_model.optimal_k_ == 2
def test_maui_clusters_with_single_k(): maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1) maui_model.z_ = pd.DataFrame(np.random.randn(10,2), index=[f'sample {i}' for i in range(10)], columns=['LF1', 'LF2']) maui_model.x_ = pd.DataFrame(np.random.randn(20,10), index=[f'feature {i}' for i in range(20)], columns=[f'sample {i}' for i in range(10)]) yhat = maui_model.cluster(5) assert yhat.shape == (10,)
def test_maui_clusters_only_samples_in_y_index_when_optimizing(): maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1) maui_model.z_ = pd.DataFrame(np.random.randn(10,2), index=[f'sample {i}' for i in range(10)], columns=['LF1', 'LF2']) maui_model.x_ = pd.DataFrame(np.random.randn(20,10), index=[f'feature {i}' for i in range(20)], columns=[f'sample {i}' for i in range(10)]) y = pd.Series(['a','a','a','b','b','b'], index=[f'sample {i}' for i in range(6)]) yhat = maui_model.cluster(ami_y=y, optimal_k_range=[1,2,3]) assert set(yhat.index) == set(y.index)
def test_maui_clusters_only_samples_in_y_index_when_optimizing(): np.random.seed(0) maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1) maui_model.z_ = pd.DataFrame( np.random.randn(10, 2), index=[f"sample {i}" for i in range(10)], columns=["LF1", "LF2"], ) maui_model.x_ = pd.DataFrame( np.random.randn(20, 10), index=[f"feature {i}" for i in range(20)], columns=[f"sample {i}" for i in range(10)], ) y = pd.Series(["a", "a", "a", "b", "b", "b"], index=[f"sample {i}" for i in range(6)]) yhat = maui_model.cluster(ami_y=y, optimal_k_range=[1, 2, 3]) assert set(yhat.index) == set(y.index)