Beispiel #1
0
    def test_mirac_wrong_args(self):
        x = np.zeros((10, 10))
        # wrong min_cl_n
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', min_cl_n=-0.1)

        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', min_cl_n=-0.1)
        # wrong cl_mdl_scale_factor
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', cl_mdl_scale_factor=-0.1)
        # wrong encode type
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', encode_type='1')

        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', encode_type=1)

        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', dim_reduct_method='NONN')

        # hac tree n_leaves different from n_samples
        z = sch.linkage([[0], [5], [6], [8], [9], [12]],
                        method='single', optimal_ordering=True)
        hct = eda.HClustTree(sch.to_tree(z))
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', hac_tree=hct)
Beispiel #2
0
    def test_mirac_dim_reduction(self):
        tx, tlab = skdset.make_blobs(n_samples=100, n_features=2,
                                     centers=10, random_state=8927)
        tx = tx - tx.min()
        sdm = eda.SampleDistanceMatrix(tx, metric='euclidean')
        m = cluster.MIRAC(sdm._x, sdm._d, metric='euclidean', linkage='ward',
                          min_cl_n=35, cl_mdl_scale_factor=1,
                          dim_reduct_method='PCA', verbose=True)
        assert len(m.labs) == 100
        hct = eda.HClustTree.hclust_tree(sdm._d, linkage='ward',
                                         optimal_ordering=True)
        m2 = cluster.MIRAC(sdm._x, hac_tree=hct, min_cl_n=35,
                           cl_mdl_scale_factor=1, encode_type='data',
                           mdl_method=eda.mdl.ZeroIGKdeMdl,
                           dim_reduct_method='t-SNE', verbose=False)
        assert m.labs == m2.labs
        assert m2._sdm._lazy_load_d is None

        tx2, tlab2 = skdset.make_blobs(n_samples=500, n_features=50,
                                       centers=5, cluster_std=15,
                                       random_state=8927)
        tx2 = tx2 - tx2.min()
        cluster.MIRAC(tx2, metric='euclidean', min_cl_n=15,
                      optimal_ordering=False, cl_mdl_scale_factor=0,
                      dim_reduct_method='UMAP', verbose=True)
        # auto to encode distance
        tx3, tlab3 = skdset.make_blobs(n_samples=100, n_features=101,
                                       cluster_std=0.01, centers=5,
                                       random_state=8927)
        tx3 = tx3 - tx3.min()
        sdm = eda.SampleDistanceMatrix(tx3, metric='euclidean')
        m = cluster.MIRAC(sdm._x, sdm._d, metric='euclidean', linkage='ward',
                          min_cl_n=5, cl_mdl_scale_factor=1,
                          dim_reduct_method='t-SNE', verbose=True)
        # empty
        sdm = eda.SampleDistanceMatrix([[], []], metric='euclidean')

        with pytest.raises(Exception) as excinfo:
            m = cluster.MIRAC(sdm._x, sdm._d, metric='euclidean',
                              linkage='ward',
                              min_cl_n=35, cl_mdl_scale_factor=1,
                              dim_reduct_method='PCA', verbose=True)
Beispiel #3
0
 def test_mirac_rerun(self):
     tx, tlab = skdset.make_blobs(n_samples=100, n_features=2,
                                  centers=10, random_state=8927)
     tx = tx - tx.min()
     m = cluster.MIRAC(tx, min_cl_n=35,
                        cl_mdl_scale_factor=1, encode_type='data',
                        mdl_method=eda.mdl.ZeroIGKdeMdl,
                        dim_reduct_method='t-SNE', verbose=False)
     m_tsne = m._sdm._x
     m.tune_parameters(0, 5, 0.1, 1)
     assert m_tsne is m._sdm._x
Beispiel #4
0
 def test_mirac_run_300x1000(self):
     mirac_res = cluster.MIRAC(self.x300x1000,
                               metric="correlation",
                               encode_type="distance",
                               min_cl_n=10,
                               min_split_mdl_red_ratio=0.05,
                               nprocs=3)
     assert len(mirac_res.labs) == 300
     ulabs, ulab_cnts = np.unique(mirac_res.labs, return_counts=True)
     np.testing.assert_equal(ulab_cnts, [60] * 5)
     mirac_res_ulab_ind = [
         np.where(mirac_res._labs == ulab)[0] for ulab in ulabs
     ]
     for i in mirac_res_ulab_ind:
         assert len(np.unique(self.labs300[i])) == 1
Beispiel #5
0
 def test_mirac_run_100x500(self):
     mirac_res = cluster.MIRAC(self.x100x500,
                               metric="correlation",
                               min_cl_n=35,
                               encode_type="data",
                               cl_mdl_scale_factor=1,
                               min_split_mdl_red_ratio=0.1,
                               nprocs=1,
                               verbose=True)
     assert len(mirac_res.labs) == 100
     ulabs, ulab_cnts = np.unique(mirac_res.labs, return_counts=True)
     np.testing.assert_equal(ulab_cnts, [50] * 2)
     mirac_res_ulab_ind = [
         np.where(mirac_res._labs == ulab)[0] for ulab in ulabs
     ]
     for i in mirac_res_ulab_ind:
         assert len(np.unique(self.labs100[i])) == 1
Beispiel #6
0
    def test_mirac_cover_tests(self):
        # make all non-neg
        x = np.zeros((10, 10))
        cluster.MIRAC(x, metric='euclidean', min_cl_n=25)
        cluster.MIRAC(x, metric='euclidean', min_cl_n=25)
        cluster.MIRAC(x, metric='euclidean', min_cl_n=25, verbose=True)

        tx, tlab = skdset.make_blobs(n_samples=100, n_features=2,
                                     centers=10, random_state=8927)
        tx = tx - tx.min()
        sdm = eda.SampleDistanceMatrix(tx, metric='euclidean')
        m = cluster.MIRAC(sdm._x, sdm._d, metric='euclidean', linkage='ward',
                          min_cl_n=35, cl_mdl_scale_factor=1, verbose=True)
        assert len(m.labs) == 100
        hct = eda.HClustTree.hclust_tree(sdm._d, linkage='ward',
                                         optimal_ordering=True)
        m2 = cluster.MIRAC(sdm._x, hac_tree=hct, min_cl_n=35,
                           cl_mdl_scale_factor=1, encode_type='data',
                           mdl_method=eda.mdl.ZeroIGKdeMdl, verbose=False)
        assert m.labs == m2.labs
        assert m2._sdm._lazy_load_d is None

        tx2, tlab2 = skdset.make_blobs(n_samples=500, n_features=5,
                                       centers=5, cluster_std=1,
                                       random_state=8927)
        tx2 = tx2 - tx2.min()
        cluster.MIRAC(tx2, metric='euclidean', min_cl_n=15,
                      optimal_ordering=False, cl_mdl_scale_factor=0,
                      verbose=True)
        # auto to encode distance
        tx3, tlab3 = skdset.make_blobs(n_samples=100, n_features=101,
                                       cluster_std=0.01, centers=5,
                                       random_state=8927)
        tx3 = tx3 - tx3.min()
        sdm = eda.SampleDistanceMatrix(tx3, metric='euclidean')
        m = cluster.MIRAC(sdm._x, sdm._d, metric='euclidean', linkage='ward',
                          min_cl_n=5, cl_mdl_scale_factor=1, verbose=True)
        assert m._encode_type == 'distance'
        # empty
        sdm = eda.SampleDistanceMatrix([[], []], metric='euclidean')
        m = cluster.MIRAC(sdm._x, sdm._d, metric='euclidean', linkage='ward',
                          min_cl_n=35, cl_mdl_scale_factor=1, verbose=True)
Beispiel #7
0
    def test_mirac_dmat_heatmap(self):
        # create a dummy MIRAC object
        sdm = eda.SampleDistanceMatrix([[], []], metric='euclidean')
        m = cluster.MIRAC(sdm._x, sdm._d, metric='euclidean', linkage='ward',
                          min_cl_n=35, cl_mdl_scale_factor=1, verbose=True)
        # empty tree
        m._hac_tree = eda.HClustTree(None)
        assert m.dmat_heatmap() is None
        # normal tree and d
        sdm_5x2 = eda.SampleDistanceMatrix([[0, 0],
                                            [100, 100],
                                            [1, 1],
                                            [101, 101],
                                            [80, 80]],
                                           metric='euclidean')
        # This tree should be
        #   _______|_____
        #   |       ____|___
        # __|___    |    __|___
        # |    |    |    |    |
        # 0    2    4    1    3
        # Leaves are in optimal order.
        hct = eda.HClustTree.hclust_tree(sdm_5x2.d, linkage='auto')
        m._hac_tree = hct
        m._sdm = sdm_5x2

        # invalid labels
        # leaf order: [0, 2, 4, 1, 3]
        # leaf labs : [1, 1, 2, 3, 2]
        m._labs = [1, 3, 1, 2, 2]
        with pytest.raises(ValueError) as excinfo:
            m.dmat_heatmap()
        m._labs = ['1', '3', '1', '2', '2']
        with pytest.raises(ValueError) as excinfo:
            m.dmat_heatmap()
        # leaf order: [0, 2, 4, 1, 3]
        # leaf labs : [5, 1, 2, 1, 2]
        m._labs = [5, 1, 1, 2, 2]
        with pytest.raises(ValueError) as excinfo:
            m.dmat_heatmap()
        # leaf order: [0, 2, 4, 1, 3]
        # leaf labs : [5, 1, 2, 3, 2]
        m._labs = [5, 3, 1, 2, 2]
        with pytest.raises(ValueError) as excinfo:
            m.dmat_heatmap()
        # leaf order: [0, 2, 4, 1, 3]
        # leaf labs : [5, 10, 20, 3, 5]
        m._labs = [5, 3, 10, 5, 20]
        with pytest.raises(ValueError) as excinfo:
            m.dmat_heatmap()

        # valid labels
        # leaf order: [0, 2, 4, 1, 3]
        # leaf labs : [2, 2, 1, 1, 0]
        m._labs = [2, 1, 2, 0, 1]
        m.dmat_heatmap()
        m._labs = ['2', '1', '2', '0', '1']
        m.dmat_heatmap()
        # leaf order: [0, 2, 4, 1, 3]
        # leaf labs : [0, 2, 4, 1, 3]
        m._labs = [0, 1, 2, 3, 4]
        m.dmat_heatmap()
        # leaf order: [0, 2, 4, 1, 3]
        # leaf labs : [0, 0, 0, 0, 0]
        m._labs = [0]*5
        m.dmat_heatmap()

        # have a figure
        # leaf order: [ 0 ,  2 ,  4 ,  1 ,  3 ]
        # leaf labs : ['b', 'l', 'r', 'e', 'o']
        m._labs = ['beginning', 'end', 'l', 'out', 'r']
        fig = m.dmat_heatmap(
            selected_labels=['beginning', 'end', 'l', 'r'])
        assert fig is not None
        return fig