Ejemplo n.º 1
0
def test_maui_merges_latent_factors():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(
        [
            [1,1,1,0,0,0,1,0,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,1,0,0,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,1,1,1,0],
        ],
        index=[f'sample {i}' for i in range(11)],
        columns=[f'LF{i}' for i in range(9)],
        dtype=float
        ) # expect 0,1,2 to be merged, and 3,7 to be merged

    z_merged = maui_model.merge_similar_latent_factors(distance_metric='euclidean')
    assert z_merged.shape[1] == 6
    assert '0_1_2' in z_merged.columns
    assert '3_7' in z_merged.columns
Ejemplo n.º 2
0
def test_select_clinical_factors():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(
        [
            [1,1,1,0,0,0,1,0,1],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,1,0,0,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,1,1,1,1],
        ],
        index=[f'sample {i}' for i in range(11)],
        columns=[f'LF{i}' for i in range(9)]
    ) # here the first 3 factors separate the groups and the last 6 do not

    durations = [1,2,3,4,5,6, 1000,2000,3000, 4000, 5000] # here the first 3 have short durations, the last 3 longer ones
    observed = [True]*11 # all events observed
    survival = pd.DataFrame(dict(duration=durations, observed=observed),
        index=[f'sample {i}' for i in range(11)])

    z_clin = maui_model.select_clinical_factors(survival, cox_penalizer=1)
    assert 'LF0' in z_clin.columns
    assert 'LF5' not in z_clin.columns
Ejemplo n.º 3
0
def test_maui_computes_harrells_c():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(
        [
            [1,1,1,0,0,0,1,0,1],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,1,0,0,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,1,1,1,1],
        ],
        index=[f'sample {i}' for i in range(11)],
        columns=[f'LF{i}' for i in range(9)]
    ) # here the first 3 factors separate the groups and the last 6 do not

    durations = [1,2,3,4,5,6, 1000,2000,3000, 4000, 5000] # here the first 3 have short durations, the last 3 longer ones
    observed = [True]*11 # all events observed
    survival = pd.DataFrame(dict(duration=durations, observed=observed),
        index=[f'sample {i}' for i in range(11)])
    cs = maui_model.c_index(survival, clinical_only=True,
        duration_column='duration', observed_column='observed',
        cox_penalties=[.1,1,10,100,1000,10000],
        cv_folds=2, sel_clin_alpha=.05, sel_clin_penalty=1)
    print(cs)
    assert np.allclose(cs, [.8,.8])
Ejemplo n.º 4
0
def test_maui_merge_latent_factors_complains_if_unknown_merge_by():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(
        [
            [1, 1, 1, 0, 0, 0, 1, 0, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 1, 0, 0, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 1, 1, 1, 0],
        ],
        index=[f"sample {i}" for i in range(11)],
        columns=[f"LF{i}" for i in range(9)],
        dtype=float,
    )  # expect 0,1,2 to be merged, and 3,7 to be merged

    with pytest.raises(Exception):
        z_merged = maui_model.merge_similar_latent_factors(
            distance_in="xxx", distance_metric="euclidean"
        )
Ejemplo n.º 5
0
def test_maui_clusters_picks_optimal_k_by_ami():
    ami_mock = mock.Mock()
    ami_mock.side_effect = [
        2,
        3,
        1,
    ]  # the optimal AMI will be given at the second trial
    with mock.patch("sklearn.metrics.adjusted_mutual_info_score", ami_mock):
        maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
        maui_model.z_ = pd.DataFrame(
            np.random.randn(10, 2),
            index=[f"sample {i}" for i in range(10)],
            columns=["LF1", "LF2"],
        )
        maui_model.x_ = pd.DataFrame(
            np.random.randn(20, 10),
            index=[f"feature {i}" for i in range(20)],
            columns=[f"sample {i}" for i in range(10)],
        )

        the_y = pd.Series(np.arange(10), index=maui_model.z_.index)

        maui_model.cluster(
            ami_y=the_y, optimal_k_range=[1, 2, 3]
        )  # the second trial is k=2

        assert maui_model.optimal_k_ == 2
Ejemplo n.º 6
0
def test_maui_merges_latent_factors_by_w():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(
        [
            [1, 1, 1, 0, 0, 0, 1, 0, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 1, 0, 0, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 1, 1, 1, 0],
        ],
        index=[f"sample {i}" for i in range(11)],
        columns=[f"LF{i}" for i in range(9)],
        dtype=float,
    )
    maui_model.x_ = pd.DataFrame(
        [[1], [1], [1], [1], [1], [1], [0], [0], [0], [0], [0]],
        index=[f"sample {i}" for i in range(11)],
        columns=["Feature 1"],
        dtype=float,
    )
    # with these z and x, expect 0,1,2 and 4,5 and 3,6,7
    z_merged = maui_model.merge_similar_latent_factors(
        distance_in="w", distance_metric="euclidean"
    )
    assert z_merged.shape[1] == 4
    assert "0_1_2" in z_merged.columns
    assert "3_6_7" in z_merged.columns
    assert "4_5" in z_merged.columns
Ejemplo n.º 7
0
def test_maui_drops_unexplanatody_factors_by_r2():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(
        [
            [1, 1, 1, 0, 0, 0, 1, 0, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 0],
            [1, 1, 1, 1, 1, 0, 0, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 0, 1, 1, 0],
            [0, 0, 0, 1, 0, 1, 1, 1, 0],
        ],
        index=[f"sample {i}" for i in range(11)],
        columns=[f"LF{i}" for i in range(9)],
        dtype=float,
    )  # here the first 8 latent factors have R2 above threshold, the last does not
    maui_model.x_ = pd.DataFrame(
        [[1], [1], [1], [1], [1], [1], [0], [0], [0], [0], [0]],
        index=[f"sample {i}" for i in range(11)],
        columns=["Feature 1"],
        dtype=float,
    )

    z_filt = maui_model.drop_unexplanatory_factors()

    assert z_filt.shape[1] == 8
Ejemplo n.º 8
0
def test_maui_clusters_with_single_k():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(np.random.randn(10,2),
        index=[f'sample {i}' for i in range(10)],
        columns=['LF1', 'LF2'])
    maui_model.x_ = pd.DataFrame(np.random.randn(20,10),
        index=[f'feature {i}' for i in range(20)],
        columns=[f'sample {i}' for i in range(10)])

    yhat = maui_model.cluster(5)
    assert yhat.shape == (10,)
Ejemplo n.º 9
0
def test_maui_clusters_picks_optimal_k_by_silhouette():
    silhouette_mock = mock.Mock()
    silhouette_mock.side_effect = [2,3,1] # the optimal silhouette will be given at the second trial
    with mock.patch('sklearn.metrics.silhouette_score', silhouette_mock):
        maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
        maui_model.z_ = pd.DataFrame(np.random.randn(10,2),
            index=[f'sample {i}' for i in range(10)],
            columns=['LF1', 'LF2'])
        maui_model.x_ = pd.DataFrame(np.random.randn(20,10),
            index=[f'feature {i}' for i in range(20)],
            columns=[f'sample {i}' for i in range(10)])
        maui_model.cluster(optimal_k_method='silhouette', optimal_k_range=[1,2,3]) # the second trial is k=2

        assert maui_model.optimal_k_ == 2
Ejemplo n.º 10
0
def test_maui_clusters_only_samples_in_y_index_when_optimizing():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(np.random.randn(10,2),
        index=[f'sample {i}' for i in range(10)],
        columns=['LF1', 'LF2'])
    maui_model.x_ = pd.DataFrame(np.random.randn(20,10),
        index=[f'feature {i}' for i in range(20)],
        columns=[f'sample {i}' for i in range(10)])

    y = pd.Series(['a','a','a','b','b','b'],
        index=[f'sample {i}' for i in range(6)])

    yhat = maui_model.cluster(ami_y=y, optimal_k_range=[1,2,3])
    assert set(yhat.index) == set(y.index)
Ejemplo n.º 11
0
def test_maui_clusters_picks_optimal_k_with_custom_scoring():
    scorer = mock.Mock()
    scorer.side_effect = [2,3,1] # the optimal AMI will be given at the second trial
    scorer.__name__ = 'mock_scorer'

    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(np.random.randn(10,2),
        index=[f'sample {i}' for i in range(10)],
        columns=['LF1', 'LF2'])
    maui_model.x_ = pd.DataFrame(np.random.randn(20,10),
        index=[f'feature {i}' for i in range(20)],
        columns=[f'sample {i}' for i in range(10)])
    maui_model.cluster(optimal_k_method=scorer, optimal_k_range=[1,2,3]) # the second trial is k=2

    assert maui_model.optimal_k_ == 2
Ejemplo n.º 12
0
def test_maui_merges_latent_factors_by_w():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(
        [
            [1,1,1,0,0,0,1,0,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,0,1,1,1,0],
            [1,1,1,1,1,0,0,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,0,1,1,0],
            [0,0,0,1,0,1,1,1,0],
        ],
        index=[f'sample {i}' for i in range(11)],
        columns=[f'LF{i}' for i in range(9)],
        dtype=float
        )
    maui_model.x_ = pd.DataFrame(
        [
            [1],
            [1],
            [1],
            [1],
            [1],
            [1],
            [0],
            [0],
            [0],
            [0],
            [0],
        ],
        index=[f'sample {i}' for i in range(11)],
        columns=['Feature 1'],
        dtype=float)
    # with these z and x, expect 0,1,2 and 4,5 and 3,6,7
    z_merged = maui_model.merge_similar_latent_factors(distance_in='w',
        distance_metric='euclidean')
    assert z_merged.shape[1] == 4
    assert '0_1_2' in z_merged.columns
    assert '3_6_7' in z_merged.columns
    assert '4_5' in z_merged.columns
Ejemplo n.º 13
0
def test_maui_clusters_only_samples_in_y_index_when_optimizing():
    np.random.seed(0)
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(
        np.random.randn(10, 2),
        index=[f"sample {i}" for i in range(10)],
        columns=["LF1", "LF2"],
    )
    maui_model.x_ = pd.DataFrame(
        np.random.randn(20, 10),
        index=[f"feature {i}" for i in range(20)],
        columns=[f"sample {i}" for i in range(10)],
    )

    y = pd.Series(["a", "a", "a", "b", "b", "b"],
                  index=[f"sample {i}" for i in range(6)])

    yhat = maui_model.cluster(ami_y=y, optimal_k_range=[1, 2, 3])
    assert set(yhat.index) == set(y.index)
Ejemplo n.º 14
0
def test_maui_computes_roc_and_auc():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame([
        [0, 1, 1, 1, 0, 1, 1, 0, 0],
        [1, 0, 0, 0, 0, 0, 1, 1, 0],
        [1, 0, 1, 0, 0, 0, 1, 1, 0],
        [1, 0, 0, 1, 0, 0, 1, 1, 0],
        [1, 0, 0, 0, 1, 1, 1, 1, 0],
        [1, 1, 1, 0, 0, 0, 1, 1, 1],
    ],
                                 index=[f'sample {i}' for i in range(6)],
                                 columns=[f'LF{i}' for i in range(9)])
    y = pd.Series(['a', 'b', 'a', 'c', 'b', 'c'], index=maui_model.z_.index)
    rocs = maui_model.compute_roc(y, cv_folds=2)
    assert rocs == maui_model.roc_curves_
    assert 'a' in rocs
    assert 'b' in rocs
    assert 'c' in rocs
    assert "mean" in rocs

    aucs = maui_model.compute_auc(y, cv_folds=2)
    assert aucs == maui_model.aucs_
Ejemplo n.º 15
0
def test_maui_computes_roc_and_auc():
    maui_model = Maui(n_hidden=[10], n_latent=2, epochs=1)
    maui_model.z_ = pd.DataFrame(
        [
            [0, 1, 1, 1, 0, 1, 1, 0, 0],
            [1, 0, 0, 0, 0, 0, 1, 1, 0],
            [1, 0, 1, 0, 0, 0, 1, 1, 0],
            [1, 0, 0, 1, 0, 0, 1, 1, 0],
            [1, 0, 0, 0, 1, 1, 1, 1, 0],
            [1, 1, 1, 0, 0, 0, 1, 1, 1],
        ],
        index=[f"sample {i}" for i in range(6)],
        columns=[f"LF{i}" for i in range(9)],
    )
    y = pd.Series(["a", "b", "a", "c", "b", "c"], index=maui_model.z_.index)
    rocs = maui_model.compute_roc(y, cv_folds=2)
    assert rocs == maui_model.roc_curves_
    assert "a" in rocs
    assert "b" in rocs
    assert "c" in rocs
    assert "mean" in rocs

    aucs = maui_model.compute_auc(y, cv_folds=2)
    assert aucs == maui_model.aucs_