def test_DTWKmeans_inertia_positive(self): list_of_series = make_flat_dataset([-1.0,0,0.5],10,additive_noise_factor=0.3,level_noise_factor=0.3,lengths=[5]) num_clusters = 3 iterations = 1 random_seed = 101 clts_1 = DTWKmeans(num_clust = num_clusters, num_iter = iterations, random_seed=random_seed) clts_1.fit(list_of_series) intertia = clts_1._inertia(list_of_series) assert intertia > 0
def test_random_seed(self): list_levels = [-1.0, 0, 0.5] list_size = [10, 5, 20] list_lenghts = [5] random_seed = 101 list_of_series_1 = make_flat_dataset(list_levels, list_size, additive_noise_factor=0.3, level_noise_factor=0.3, lengths=list_lenghts, random_seed=random_seed) list_of_series_2 = make_flat_dataset(list_levels, list_size, additive_noise_factor=0.3, level_noise_factor=0.3, lengths=list_lenghts, random_seed=random_seed) assert lists_of_series_are_equal(list_of_series_1, list_of_series_2)
def test_flat_unbalanced_clusters(self): list_levels = [-1.0, 0, 0.5] list_size = [10, 5, 20] list_lenghts = [5] list_of_series = make_flat_dataset(list_levels, list_size, additive_noise_factor=0.3, level_noise_factor=0.3, lengths=list_lenghts) assert len(list_of_series) == sum(list_size)
def test_flat_equal_clusters(self): list_levels = [-1.0, 0, 0.5] scalar_size = 10 list_lenghts = [5] list_of_series = make_flat_dataset(list_levels, scalar_size, additive_noise_factor=0.0, level_noise_factor=0.0, lengths=list_lenghts) assert len(list_of_series) == scalar_size * len(list_levels)
def flat_dataset(random_seed=101): # build the dataset around 3 levels levels = [1.5,0,-1.5] # with different number of elements for each cluster sizes = [15,30,10] # set random seed for reproduceability, you can remove the argument to allow different results for each run list_of_series = make_flat_dataset(levels,sizes, additive_noise_factor=0.4,level_noise_factor=0.4, lengths=[10],random_seed=random_seed) return list_of_series
def test_DTWKmeans_inertia_decrease_with_iteration_increase(self): list_of_series = make_flat_dataset([-1.0,0,0.5],10,additive_noise_factor=0.3,level_noise_factor=0.3,lengths=[5]) num_clusters = 3 random_seed = 101 clts_1 = DTWKmeans(num_clust = num_clusters, num_iter=1, random_seed=random_seed) clts_1.fit(list_of_series) clts_2 = DTWKmeans(num_clust = num_clusters, num_iter=2, random_seed=random_seed) clts_2.fit(list_of_series) print (clts_1._inertia(list_of_series)) print (clts_2._inertia(list_of_series)) #assert False assert clts_1._inertia(list_of_series) >= clts_2._inertia(list_of_series)
def test_DTWKmeans_fit_is_reproduceable_using_random_seed(self): list_of_series = make_flat_dataset([-1.0,0,1.0],10,additive_noise_factor=0.1,level_noise_factor=0.1,lengths=[5]) num_clusters = 3 iterations = 1 random_seed = 101 clts_1 = DTWKmeans(num_clust = num_clusters, num_iter = iterations, random_seed=random_seed) clts_1.fit(list_of_series) df1 = pd.DataFrame(clts_1.cluster_centers_) clts_2 = DTWKmeans(num_clust = num_clusters, num_iter = iterations, random_seed=random_seed) clts_2.fit(list_of_series) df2 = pd.DataFrame(clts_2.cluster_centers_) assert np.all(df1.values==df2.values)