def test_pass_if_consistent_on_similar_random_data(self): test_cases = [ (5, [0, 1, 0], 5), (5, [0, 1, 2, 1, 0], 5), ] for n, labels, repeats in test_cases: rdata = RandomData(seed=0, n_features=n, window_size=5) k = len(set(labels)) t = 200 * k * len(labels) breaks = [(i) * t // len(labels) for i, _ in enumerate(labels, 1)] rdata.generate_cluster_params(k) # Reuse same cluster parameters for each dataset data = [ rdata.generate_points(labels, breaks, True)[0] for i in range(repeats) ] ticc = TICC(n_clusters=k, window_size=5, beta=300, n_jobs=4, random_state=0, cluster_reassignment=0.3, verbose=True) y_preds = [ticc.fit_predict(X) for X in data] for y1, y2 in combinations(y_preds, 2): result = np.sum(np.not_equal(y1, y2)) / t assert result < 0.02
def test_recycling_clusters_between_calls(self): test_cases = [ ([0, 1, 0], [20, 40, 60], 0), ([0, 1, 0], [20, 40, 60], 10), ([0, 1, 0], [20, 40, 60], 100), ] for seg, b, seed in test_cases: rdata = RandomData(seed) rdata.generate_cluster_params(len(set(seg))) X1, y1 = rdata.generate_points(seg, b, True) C1 = rdata.clusters X2, y2 = rdata.generate_points(seg, b, True) C2 = rdata.clusters assert C1 == C2