def test_distance_mdl_computation_mp(self): mdl_slcs = eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, encode_type="data", mdl_method=eda.mdl.GKdeMdl, metric="euclidean") data_no_lab_mdl = mdl_slcs.no_lab_mdl(nprocs=2) mdl_slcs = eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, encode_type="distance", mdl_method=eda.mdl.GKdeMdl, metric="euclidean") no_lab_mdl = mdl_slcs.no_lab_mdl(nprocs=2) (ulab_mdl_sum, ulab_s_ind_l, ulab_cnt_l, ulab_mdl_l, cluster_mdl) = mdl_slcs.lab_mdl(nprocs=2) assert ulab_s_ind_l == [list(range(10)), list(range(10, 45)), list(range(45, 50))] assert ulab_mdl_sum == np.sum(ulab_mdl_l) ulab_cnt_l = [10, 35, 5] for i in range(3): ci_mdl = eda.MDLSingleLabelClassifiedSamples( mdl_slcs._x[ulab_s_ind_l[i]].copy(), labs=[self.labs50[ii] for ii in ulab_s_ind_l[i]], metric="euclidean", encode_type="distance", mdl_method=eda.mdl.GKdeMdl) ci_mdl_no_lab_res = ci_mdl.no_lab_mdl(nprocs=2) np.testing.assert_allclose( ci_mdl_no_lab_res, ulab_mdl_l[i] - cluster_mdl * ulab_cnt_l[i] / 50)
def test_mdl_method(self): zigk_mdl_slcs = eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, mdl_method=eda.mdl.ZeroIGKdeMdl, metric="euclidean") gk_mdl_slcs = eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, mdl_method=eda.mdl.GKdeMdl, metric="euclidean") assert gk_mdl_slcs._mdl_method is eda.mdl.GKdeMdl assert zigk_mdl_slcs._mdl_method is eda.mdl.ZeroIGKdeMdl assert zigk_mdl_slcs.no_lab_mdl() != gk_mdl_slcs.no_lab_mdl() for mdl_method in eda.MDL_METHODS: mdl_slcs = eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, mdl_method=mdl_method, metric="euclidean") (ulab_mdl_sum, ulab_s_ind_l, ulab_cnt_l, ulab_mdl_l, cluster_mdl) = mdl_slcs.lab_mdl() assert ulab_s_ind_l == [list(range(10)), list(range(10, 45)), list(range(45, 50))] assert ulab_mdl_sum == np.sum(ulab_mdl_l) ulab_cnt_l = [10, 35, 5] for i in range(3): ci_mdl = eda.MDLSingleLabelClassifiedSamples( self.x50x5[ulab_s_ind_l[i], :], labs=[self.labs50[ii] for ii in ulab_s_ind_l[i]], mdl_method=mdl_method, metric="euclidean") np.testing.assert_allclose( ci_mdl.no_lab_mdl(), ulab_mdl_l[i] - cluster_mdl * ulab_cnt_l[i] / 50)
def test_auto_param(self): eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, encode_type="auto", mdl_method=None, metric="euclidean") eda.MDLSingleLabelClassifiedSamples( np.zeros((100, 101)), labs=[0]*100, encode_type="auto", mdl_method=None, metric="euclidean") eda.MDLSingleLabelClassifiedSamples( np.ones((100, 100)), labs=[0]*100, encode_type="auto", mdl_method=None, metric="euclidean") eda.MDLSingleLabelClassifiedSamples( [[], []], labs=[0]*2, encode_type="auto", mdl_method=None, metric="euclidean")
def test_wrong_encode_type(self): with pytest.raises(ValueError) as excinfo: eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, encode_type="123", metric="euclidean").no_lab_mdl() with pytest.raises(ValueError) as excinfo: eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, encode_type=1, metric="euclidean").no_lab_mdl() with pytest.raises(ValueError) as excinfo: eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, encode_type=None, metric="euclidean").no_lab_mdl()
def test_encode_mdl(self): mdl_slcs = eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, metric="euclidean") # wrong dimensions with pytest.raises(ValueError) as excinfo: mdl_slcs.encode(np.zeros((10, 3))) with pytest.raises(ValueError) as excinfo: mdl_slcs.encode(np.zeros(20)) with pytest.raises(ValueError) as excinfo: mdl_slcs.encode(np.zeros(20), col_summary_func=1) with pytest.raises(ValueError) as excinfo: mdl_slcs.encode(np.zeros(20), col_summary_func=None) emdl = mdl_slcs.encode(np.arange(100).reshape(-1, 5)) emdl2 = mdl_slcs.encode(np.arange(100).reshape(-1, 5), nprocs=2) np.testing.assert_approx_equal(emdl, emdl2) emdl3 = eda.MDLSingleLabelClassifiedSamples( self.x50x5, mdl_method=eda.mdl.GKdeMdl, labs=self.labs50, metric="euclidean").encode(np.arange(100).reshape(-1, 5)) assert emdl != emdl3 emdl4 = eda.MDLSingleLabelClassifiedSamples( np.zeros((50, 5)), mdl_method=eda.mdl.GKdeMdl, labs=self.labs50, metric="euclidean").encode(np.arange(100).reshape(-1, 5)) assert emdl != emdl4 emdl5 = eda.MDLSingleLabelClassifiedSamples( np.zeros((50, 5)), mdl_method=eda.mdl.GKdeMdl, labs=self.labs50, metric="euclidean").encode(np.arange(100).reshape(-1, 5), non_zero_only=True) assert emdl5 != emdl3 emdl6 = eda.MDLSingleLabelClassifiedSamples( self.x50x5, encode_type="distance", mdl_method=eda.mdl.GKdeMdl, labs=self.labs50, metric="euclidean").encode( np.arange(100).reshape(-1, 50), non_zero_only=True) assert emdl5 != emdl3
def test_mdl_computation(self): mdl_slcs = eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, metric="euclidean") no_lab_mdl = mdl_slcs.no_lab_mdl() (ulab_mdl_sum, ulab_s_ind_l, ulab_cnt_l, ulab_mdl_l, cluster_mdl) = mdl_slcs.lab_mdl() assert ulab_s_ind_l == [list(range(10)), list(range(10, 45)), list(range(45, 50))] assert ulab_mdl_sum == np.sum(ulab_mdl_l) ulab_cnt_l = [10, 35, 5] for i in range(3): ci_mdl = eda.MDLSingleLabelClassifiedSamples( self.x50x5[ulab_s_ind_l[i], :], labs=[self.labs50[ii] for ii in ulab_s_ind_l[i]], metric="euclidean") np.testing.assert_allclose( ci_mdl.no_lab_mdl(), ulab_mdl_l[i] - cluster_mdl * ulab_cnt_l[i] / 50)
def test_wrong_mdl_method(self): with pytest.raises(ValueError) as excinfo: eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, mdl_method="123", metric="euclidean").no_lab_mdl() with pytest.raises(ValueError) as excinfo: eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, mdl_method=int, metric="euclidean").no_lab_mdl() with pytest.raises(ValueError) as excinfo: eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, mdl_method="ZeroIMdl", metric="euclidean").no_lab_mdl() with pytest.raises(ValueError) as excinfo: eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, mdl_method=2, metric="euclidean").no_lab_mdl()
def test_lab_mdl_ret_internal(self): mdl_slcs = eda.MDLSingleLabelClassifiedSamples( self.x50x5, labs=self.labs50, metric="euclidean") ((ulab_mdl_sum, ulab_s_ind_l, ulab_cnt_l, ulab_mdl_l, cluster_mdl), mdl_l) = mdl_slcs.lab_mdl(ret_internal=True) np.testing.assert_allclose(sum(mdl_l) + cluster_mdl, sum(ulab_mdl_l)) lab_mdl_res = mdl_slcs.lab_mdl() ulab_mdl_sum2 = lab_mdl_res.ulab_mdl_sum assert ulab_mdl_sum2 == ulab_mdl_sum assert ulab_mdl_sum2 == np.sum(lab_mdl_res.ulab_mdls)