def test_kmeans_modelweights(self): ds = xr.open_dataset(self.nc_file) # Test sample weights model_weights = np.ones(ds.data.shape[0]) model_weights[[4, 7, 10, 23]] = 0 # set to zero for some models that are selected in n_cluster test - these models should not be selected now [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"n_clusters": 4}, random_state=42, make_graph=False, model_weights=model_weights, ) for i in np.where(model_weights == 0)[0]: # as long as the cluster has more than one member the models w/ weight==0 should not be present if np.sum(cluster == cluster[i]) > 1: assert i not in ids model_weights = np.ones(ds.data.shape[0]) model_weights[[0, 3, 4, 6, 7, 10, 11, 12, 13]] = 0 [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"n_clusters": 9}, random_state=42, make_graph=False, model_weights=model_weights, ) for i in np.where(model_weights == 0)[0]: # as long as the cluster has more than one member the models w/ weight==0 should not be present if np.sum(cluster == cluster[i]) > 1: assert i not in ids
def test_kmeans_variweights(self): pytest.importorskip("sklearn", minversion="0.24.1") ds = open_dataset(self.nc_file) # Test sample weights var_weights = np.ones(ds.data.shape[1]) # reduce weights for some variables var_weights[3:] = 0.25 [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"rsq_cutoff": 0.9}, random_state=42, make_graph=False, variable_weights=var_weights, ) assert ids == [1, 3, 8, 10, 13, 14, 16, 19, 20] assert len(ids) == 9 # using RSQ optimize and try zero weights var_weights = np.ones(ds.data.shape[1]) var_weights[[1, 4]] = 0 [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"rsq_optimize": None}, random_state=42, make_graph=False, variable_weights=var_weights, ) # Results here may change according to sklearn version, hence the *isin* intead of == assert all(np.isin([12, 13, 16], ids)) assert len(ids) == 6
def test_kmeans_sampleweights(self): ds = open_dataset(self.nc_file) # Test sample weights sample_weights = np.ones(ds.data.shape[0]) # boost weights for some sims sample_weights[[0, 20]] = 15 [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"rsq_cutoff": 0.5}, random_state=42, make_graph=False, sample_weights=sample_weights, ) assert ids == [0, 20, 23] assert len(ids) == 3 # RSQ optimize sample_weights = np.ones(ds.data.shape[0]) # try zero weights sample_weights[[6, 18, 22]] = 0 [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"rsq_optimize": None}, random_state=0, make_graph=False, sample_weights=sample_weights, ) assert ids == [4, 5, 7, 10, 11, 12, 13] assert len(ids) == 7
def test_kmeans_nclust(self): ds = xr.open_dataset(self.nc_file) [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"n_clusters": 4}, random_state=42, make_graph=False ) assert ids == [4, 7, 10, 23] assert len(ids) == 4 [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( ds.data, method={"n_clusters": 9}, random_state=42, make_graph=False ) assert ids == [0, 3, 4, 6, 7, 10, 11, 12, 13] assert len(ids) == 9
def test_kmeans_variweights(self): pytest.importorskip("sklearn", minversion="0.22") ds = xr.open_dataset(self.nc_file) # Test sample weights var_weights = np.ones(ds.data.shape[1]) # reduce weights for some variables var_weights[3:] = 0.25 [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"rsq_cutoff": 0.9}, random_state=42, make_graph=False, variable_weights=var_weights, ) assert ids == [1, 3, 8, 10, 13, 14, 16, 19, 20] assert len(ids) == 9 # using RSQ optimize [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"rsq_optimize": None}, random_state=42, make_graph=False, variable_weights=var_weights, ) assert ids == [2, 4, 8, 13, 14, 22] assert len(ids) == 6 # try zero weights var_weights = np.ones(ds.data.shape[1]) var_weights[[1, 4]] = 0 [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"rsq_optimize": None}, random_state=42, make_graph=False, variable_weights=var_weights, ) assert ids == [4, 10, 12, 13, 16] assert len(ids) == 5
def test_kmeans_rsqopt(self): pytest.importorskip("sklearn", minversion="0.24.1") ds = open_dataset(self.nc_file) [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"rsq_optimize": None}, random_state=42, make_graph=False, ) assert ids == [3, 4, 5, 7, 10, 11, 12, 13] assert len(ids) == 8
def test_kmeans_rsqcutoff_with_graphs(self): pytest.importorskip("sklearn", minversion="0.22") ds = xr.open_dataset(self.nc_file) # use random state variable to ensure consistent clustering in tests: [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"rsq_cutoff": 0.9}, random_state=42, make_graph=True ) assert ids == [0, 1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 20, 22] assert len(ids) == 14 # Test max cluster option [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"rsq_cutoff": 0.9}, random_state=42, make_graph=True, max_clusters=10, ) assert ids == [0, 1, 3, 4, 6, 7, 10, 11, 18, 20] assert len(ids) == 10
def test_kmeans_rsqcutoff_with_graphs(self): pytest.importorskip("sklearn", minversion="0.24.1") ds = open_dataset(self.nc_file) # use random state variable to ensure consistent clustering in tests: [ids, cluster, fig_data ] = ensembles.kmeans_reduce_ensemble(data=ds.data, method={"rsq_cutoff": 0.5}, random_state=42, make_graph=True) assert ids == [4, 7, 10, 23] assert len(ids) == 4 # Test max cluster option [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble( data=ds.data, method={"rsq_cutoff": 0.5}, random_state=42, make_graph=True, max_clusters=3, ) assert ids == [4, 7, 23] assert len(ids) == 3