コード例 #1
0
ファイル: test_ticc.py プロジェクト: grddavies/TICC
 def test_pass_if_consistent_on_similar_random_data(self):
     test_cases = [
         (5, [0, 1, 0], 5),
         (5, [0, 1, 2, 1, 0], 5),
     ]
     for n, labels, repeats in test_cases:
         rdata = RandomData(seed=0, n_features=n, window_size=5)
         k = len(set(labels))
         t = 200 * k * len(labels)
         breaks = [(i) * t // len(labels) for i, _ in enumerate(labels, 1)]
         rdata.generate_cluster_params(k)
         # Reuse same cluster parameters for each dataset
         data = [
             rdata.generate_points(labels, breaks, True)[0]
             for i in range(repeats)
         ]
         ticc = TICC(n_clusters=k,
                     window_size=5,
                     beta=300,
                     n_jobs=4,
                     random_state=0,
                     cluster_reassignment=0.3,
                     verbose=True)
         y_preds = [ticc.fit_predict(X) for X in data]
         for y1, y2 in combinations(y_preds, 2):
             result = np.sum(np.not_equal(y1, y2)) / t
             assert result < 0.02
コード例 #2
0
ファイル: IntegrationTest.py プロジェクト: grddavies/TICC
    def test_example(self):
        X = np.loadtxt(
            '/Users/Gethin/Vestemi/code/TICC/ticclib/tests/test_data/example_data.txt',
            delimiter=",")
        ticc = TICC(n_clusters=8,
                    window_size=1,
                    lambda_parameter=11e-2,
                    beta=600,
                    max_iter=100,
                    n_jobs=4,
                    random_state=102,
                    verbose=True)
        # X_stacked = ticc.stack_data(X)
        cluster_assignment = ticc.fit_predict(X)
        clusters = ticc.clusters_
        # np.savetxt("UnitTest_Data/Results.txt", cluster_assignment, fmt='%d', delimiter=",")
        assign = np.loadtxt(
            "/Users/Gethin/Vestemi/code/TICC/ticclib/tests/test_data/Results.txt"
        )
        val = abs(assign - cluster_assignment)
        self.assertEqual(sum(val), 0)
        # Test prediction works with batch of data outside of `fit` method. Perhaps there is a better way
        # to test this in parallel so these are more like unit tests rather than integration tests?
        batch_labels = ticc.predict(X[0:999, ])
        # np.savetxt("UnitTest_Data/batchLabels.txt", batch_labels, fmt="%d", delimiter=',')
        batch_val = abs(batch_labels - cluster_assignment[:999])
        self.assertEqual(sum(batch_val), 0)

        # Test streaming by passing in 5 row blocks at a time (current timestamp and previous 4)
        # I am causing data leakage by training on the whole set and then using the trained model while streaming,
        # but this is for testing the code, so it is ok
        # TODO: figure out why larger blocks don't improve predictions more. Reference:
        # https://github.com/davidhallac/TICC/issues/18#issuecomment-384514116

        def test_streaming(block_size):
            test_stream = np.zeros(1000)
            test_stream[0:block_size] = cluster_assignment[0:block_size]
            for i in range(block_size, 1000):
                point = X[i - block_size:i, ]
                test_stream[i] = ticc.predict(point)[block_size - 1]

            percent_correct_streaming = 100 * sum(
                cluster_assignment[:1000] == test_stream) / 1000.0
            self.assertGreater(percent_correct_streaming, 0.9)

        test_streaming(5)

        for i in range(8):
            # np.savetxt(f"UnitTest_Data/cluster_{i}.txt", clusterMRFs[i],  fmt='%.4e', delimiter=',')
            MRF = np.loadtxt(f"test_data/cluster_{i}.txt", delimiter=',')
            np.testing.assert_array_almost_equal(MRF,
                                                 clusters[i].MRF_,
                                                 decimal=3)
コード例 #3
0
ファイル: test_ticc.py プロジェクト: grddavies/TICC
 def test_fit_predict(self):
     rdata = RandomData(0, 5, 5)
     labels = [0, 1, 2, 3, 4, 5]
     breaks = [(i) * 1200 // len(labels) for i, _ in enumerate(labels, 1)]
     X, _ = rdata.generate_points(labels, breaks)
     ticc = TICC(n_clusters=6,
                 window_size=5,
                 beta=0,
                 n_jobs=4,
                 verbose=True,
                 random_state=0)
     A = ticc.fit(X).predict(X)
     B = ticc.fit_predict(X)
     np.testing.assert_array_equal(A, B)
コード例 #4
0
ファイル: test_ticc.py プロジェクト: grddavies/TICC
 def test_matches_original_paper_macro_F1(self):
     test_cases = [
         # n_features, label order, macro f1 to-beat
         (5, [0, 1, 0], 0.9),
         (5, [0, 1, 2, 1, 0], 0.9),
         (5, [0, 1, 2, 3, 0, 1, 2, 3], 0.9),
         (5, [0, 1, 1, 0, 2, 2, 2, 0], 0.9),
     ]
     for n, labels, expected in test_cases:
         rdata = RandomData(0, n, window_size=5)
         # Original paper code performs at 100p/cluster!
         samples_per_segment = 120
         k = len(set(labels))  # Num clusters
         t = samples_per_segment * k * len(labels)  # total ts length
         breaks = [(i) * t // len(labels) for i, _ in enumerate(labels, 1)]
         X, y_tru = rdata.generate_points(labels, breaks)
         ticc = TICC(n_clusters=k, window_size=5, n_jobs=4, random_state=0)
         y = ticc.fit_predict(X)
         # We use best_f1 because label:segment assignments are arbitrary
         result = best_f1(y_tru, y, average='macro')
         assert result > expected
コード例 #5
0
ファイル: test_ticc.py プロジェクト: grddavies/TICC
 def test_label_consistency_w_different_seeds(self):
     test_cases = [
         # seed1, seed2, n_features, label order, expected
         (0, 1, 5, [0, 1, 2]),
         (3, 2, 5, [0, 1, 2, 1, 0]),
         (0, 1, 5, [0, 1, 2, 3, 0, 1, 2, 3]),
         (0, 9, 5, [0, 1, 1, 0, 2, 2, 2, 0]),
     ]
     for s1, s2, n, labels in test_cases:
         rdata = RandomData(0, n, window_size=5)
         k = len(set(labels))  # Num clusters
         t = 200 * k * len(labels)  # total ts length
         breaks = [(i) * t // len(labels) for i, _ in enumerate(labels, 1)]
         X, _ = rdata.generate_points(labels, breaks)
         ticc1 = TICC(n_clusters=k,
                      window_size=5,
                      n_jobs=4,
                      random_state=s1)
         ticc2 = TICC(n_clusters=k,
                      window_size=5,
                      n_jobs=4,
                      random_state=s2)
         y1 = ticc1.fit_predict(X)
         y2 = ticc2.fit_predict(X)
         np.testing.assert_array_equal(y1, y2)
コード例 #6
0
ファイル: IntegrationTest.py プロジェクト: grddavies/TICC
    def test_multiExample(self):
        X = np.loadtxt("test_data/example_data.txt", delimiter=",")
        ticc = TICC(n_clusters=5,
                    window_size=5,
                    lambda_parameter=11e-2,
                    beta=600,
                    max_iter=100,
                    n_jobs=4,
                    random_state=102,
                    verbose=True)
        # X_stacked = ticc.stack_data(X)
        cluster_assignment = ticc.fit_predict(X)
        clusters = ticc.clusters_
        # np.savetxt("UnitTest_Data/multiResults.txt", cluster_assignment, fmt='%d', delimiter=',')
        assign = np.loadtxt("test_data/multiResults.txt")
        val = abs(assign - cluster_assignment)
        self.assertEqual(sum(val), 0)

        for i in range(5):
            # np.savetxt(f"UnitTest_Data/multiCluster_{i}.txt", clusterMRFs[i], fmt='%.4e', delimiter=",")
            MRF = np.loadtxt(f"test_data/multiCluster_{i}.txt", delimiter=',')
            np.testing.assert_array_almost_equal(MRF,
                                                 clusters[i].MRF_,
                                                 decimal=3)
コード例 #7
0
ファイル: test_ticc.py プロジェクト: grddavies/TICC
 def test_score_increases_with_f1(self):
     rdata = RandomData(0, 5, 5)
     samples_per_segment = 300
     labels = [0, 1, 2, 3]  # , 1, 0, 1]
     k = len(set(labels))  # Num clusters
     t = samples_per_segment * k * len(labels)  # total ts length
     breaks = [(i) * t // len(labels) for i, _ in enumerate(labels, 1)]
     X, y_true = rdata.generate_points(labels, breaks)
     params = ParameterGrid({
         'beta': [0, 500, 1000],
         'window_size': [1, 5, 10],
     })
     models = (TICC(**p, random_state=0) for p in params)
     models = Parallel(n_jobs=-1)(delayed(lambda x: x.fit(X))(m)
                                  for m in models)
     scores = [model.score(X) for model in models]
     f1_scores = [
         best_f1(y_true, y) for y in map(lambda x: x.predict(X), models)
     ]
     assert scores.index(max(scores)) == f1_scores.index(max(f1_scores))
コード例 #8
0
ファイル: IntegrationTest.py プロジェクト: grddavies/TICC
 def test_empty_cluster_handling(self):
     # We check if an error is thrown during handling of empty clusters
     X = np.load('test_data/example_empty_clusters.npy')
     ticc = TICC(n_clusters=4, window_size=5, n_jobs=4, random_state=0)
     # X_stacked = ticc.stack_data(X)
     ticc.fit(X)
コード例 #9
0
ファイル: example.py プロジェクト: grddavies/TICC
k = len(set(label_seq))  # Num clusters
t = samples_per_segment * len(label_seq)  # total ts length
breaks = [i * t // len(label_seq) for i in range(1, len(label_seq) + 1)]
palette = {n: c['color'] for n, c in zip(range(n_features), colors)}
randomdata = RandomData(seed=1234,
                        n_features=n_features,
                        window_size=window_size)
X, y_true = randomdata.generate_points(label_seq, breaks)

# Plot Synthetic Data
plot_synthetic_data(X, breaks)

# %% Fit TICC and GMM to data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
ticc = TICC(n_clusters=k, window_size=window_size, random_state=1234, beta=200)
gmm = GaussianMixture(n_components=k, random_state=1234)
X_stacked = ticc.stack_data(X_scaled)

y_ticc = ticc.fit_predict(X)
y_gmm = gmm.fit_predict(X_stacked)

# Macro F1 Scores
f1_ticc = f1_score(y_true, y_ticc, average='macro')
f1_gmm = f1_score(y_true, y_gmm, average='macro')
print(f"TICC F1 score = {f1_ticc}\n GMM F1 score = {f1_gmm}")

# %% Plot Cluster Assignments
fig, axes = plt.subplots(3, sharex=True, figsize=(14, 8))
axes[0].plot(y_true, color=palette[0], label='Ground Truth')
axes[1].plot(y_ticc, color=palette[1], label='TICC')
コード例 #10
0
ファイル: test_ticc.py プロジェクト: grddavies/TICC
def test_ticc_sklearn_compatibility():
    # NOTE: window size = 1 ensures input unchanged by stacking
    # Max iter and beta of 0 to speed up test completion
    ticc = TICC(n_clusters=3, window_size=1, max_iter=15, beta=0, n_jobs=-1)
    return check_estimator(ticc)