Beispiel #1
0
 def test_matches_original_paper_macro_F1(self):
     test_cases = [
         # n_features, label order, macro f1 to-beat
         (5, [0, 1, 0], 0.9),
         (5, [0, 1, 2, 1, 0], 0.9),
         (5, [0, 1, 2, 3, 0, 1, 2, 3], 0.9),
         (5, [0, 1, 1, 0, 2, 2, 2, 0], 0.9),
     ]
     for n, labels, expected in test_cases:
         rdata = RandomData(0, n, window_size=5)
         # Original paper code performs at 100p/cluster!
         samples_per_segment = 120
         k = len(set(labels))  # Num clusters
         t = samples_per_segment * k * len(labels)  # total ts length
         breaks = [(i) * t // len(labels) for i, _ in enumerate(labels, 1)]
         X, y_tru = rdata.generate_points(labels, breaks)
         ticc = TICC(n_clusters=k, window_size=5, n_jobs=4, random_state=0)
         y = ticc.fit_predict(X)
         # We use best_f1 because label:segment assignments are arbitrary
         result = best_f1(y_tru, y, average='macro')
         assert result > expected
Beispiel #2
0
# %% Generate data
n_features = 5
label_seq = [0, 1, 2, 0, 2, 1]
samples_per_segment = 250
window_size = 8

# Derived from above params
k = len(set(label_seq))  # Num clusters
t = samples_per_segment * len(label_seq)  # total ts length
breaks = [i * t // len(label_seq) for i in range(1, len(label_seq) + 1)]
palette = {n: c['color'] for n, c in zip(range(n_features), colors)}
randomdata = RandomData(seed=1234,
                        n_features=n_features,
                        window_size=window_size)
X, y_true = randomdata.generate_points(label_seq, breaks)

# Plot Synthetic Data
plot_synthetic_data(X, breaks)

# %% Fit TICC and GMM to data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
ticc = TICC(n_clusters=k, window_size=window_size, random_state=1234, beta=200)
gmm = GaussianMixture(n_components=k, random_state=1234)
X_stacked = ticc.stack_data(X_scaled)

y_ticc = ticc.fit_predict(X)
y_gmm = gmm.fit_predict(X_stacked)

# Macro F1 Scores