def test_hartigans_rule(self, model_national): data = model_national._model_data with pytest.warns(exceptions.ModelWarning) as excinfo: funcs.apply_clustering( data, timesteps=None, clustering_func="kmeans", how="mean", normalize=True, ) assert check_error_or_warning(excinfo, "a good number of clusters is 5")
def test_hierarchical_no_hartigans_rule(self, model_national): data = model_national._model_data with pytest.raises(exceptions.ModelError) as excinfo: funcs.apply_clustering( data, timesteps=None, clustering_func="hierarchical", how="mean", normalize=True, ) assert check_error_or_warning( excinfo, "Cannot undertake hierarchical clustering")
def test_15min_clustering(self): # The data is identical for '2005-01-01' and '2005-01-03' timesteps, # it is only different for '2005-01-02' override = { "techs.test_demand_elec.constraints.resource": "file=demand_elec_15mins.csv", "model.subset_time": None, } model = build_test_model(override, scenario="simple_supply,one_day") data = model._model_data data_clustered_kmeans = funcs.apply_clustering( data, timesteps=None, clustering_func="kmeans", how="mean", normalize=True, k=2, ) data_clustered_hierarchical = funcs.apply_clustering( data, timesteps=None, clustering_func="hierarchical", how="mean", normalize=True, k=2, ) assert len(data_clustered_kmeans.clusters.to_pandas().unique()) == 2 assert len( data_clustered_hierarchical.clusters.to_pandas().unique()) == 2 days = np.unique( data_clustered_kmeans.timesteps.to_index().strftime("%Y-%m-%d")) # not sure which of '2005-01-01' and '2005-01-03' it will choose to # label the cluster of those two days assert "2005-01-02" in days and ("2005-01-01" in days or "2005-01-03" in days) assert np.array_equal( data_clustered_kmeans.timestep_resolution.values, [0.25 for i in range(24 * 4 * 2)], )
def test_hierarchical_closest(self, model_national): data = model_national._model_data data_clustered = funcs.apply_clustering( data, timesteps=None, clustering_func="hierarchical", how="closest", normalize=True, k=5, )
def test_hierarchical_mean(self, model_national): data = model_national._model_data data_clustered = funcs.apply_clustering( data, timesteps=None, clustering_func="hierarchical", how="mean", normalize=True, k=5, ) assert len(data_clustered.clusters.to_pandas().unique()) == 5