def test_constrained_paths(): n, sz, d = 15, 10, 3 rng = np.random.RandomState(0) X = rng.randn(n, sz, d) y = rng.randint(low=0, high=3, size=n) model_euc = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="euclidean") y_pred_euc = model_euc.fit(X, y).predict(X) model_dtw_sakoe = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="dtw", metric_params={ "global_constraint": "sakoe_chiba", "sakoe_chiba_radius": 0 }) y_pred_sakoe = model_dtw_sakoe.fit(X, y).predict(X) np.testing.assert_equal(y_pred_euc, y_pred_sakoe) model_softdtw = KNeighborsTimeSeriesClassifier( n_neighbors=3, metric="softdtw", metric_params={"gamma": 1e-6}) y_pred_softdtw = model_softdtw.fit(X, y).predict(X) model_dtw = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="dtw") y_pred_dtw = model_dtw.fit(X, y).predict(X) np.testing.assert_equal(y_pred_dtw, y_pred_softdtw) model_ctw = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="ctw") # Just testing that things run, nothing smart here :( model_ctw.fit(X, y).predict(X) model_sax = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="sax", metric_params={ "alphabet_size_avg": 6, "n_segments": 10 }) model_sax.fit(X, y) # The MINDIST of SAX is a lower bound of the euclidean distance euc_dist, _ = model_euc.kneighbors(X, n_neighbors=5) sax_dist, _ = model_sax.kneighbors(X, n_neighbors=5) # First column will contain zeroes np.testing.assert_array_less(sax_dist[:, 1:], euc_dist[:, 1:])
class kNNClassifier: def __init__(self, n_neighbours=5, mac_neighbours=None, weights="uniform", metric_params={}, n_jobs=-1): self.n_neighbours = n_neighbours self.mac_neighbours = mac_neighbours self.weights = weights self.metric_params = metric_params self.n_jobs = n_jobs def get_params(self, deep=True): return { "n_neighbours": self.n_neighbours, "mac_neighbours": self.mac_neighbours, "weights": self.weights, "metric_params": self.metric_params, "n_jobs": self.n_jobs } def set_params(self, **parameters): for parameter, value in parameters.items(): setattr(self, parameter, value) return self def fit(self, X_train, y_train): self.X_train = X_train self.y_train = y_train self.model = KNeighborsTimeSeriesClassifier( n_neighbors=self.n_neighbours, metric="euclidean", weights=self.weights, n_jobs=self.n_jobs).fit(self.X_train, self.y_train) return self def predict(self, X_test): if self.mac_neighbours is None: return self.model.predict(X_test) else: y_hat = [] k_neighbors = self.model.kneighbors( X_test, n_neighbors=self.mac_neighbours, return_distance=False) for idx, k in enumerate(k_neighbors): X_train = self.X_train[k] y_train = self.y_train[k] self.model = KNeighborsTimeSeriesClassifier( n_neighbors=self.n_neighbours, metric="dtw", weights=self.weights, n_jobs=self.n_jobs, metric_params=self.metric_params).fit(X_train, y_train) pred = self.model.predict(X_test[idx]) y_hat.append(pred) return y_hat
class AnomalyDetection(ClassifierMixin, BaseEstimator): """ Anomaly detection with 1-NN and automatic calculation of optimal threshold. """ def __init__(self, n_clusters=200): self.knn = KNeighborsTimeSeriesClassifier(n_neighbors=1, weights='uniform', metric='euclidean', n_jobs=-1) self.d = None self.n_clusters = n_clusters def fit(self, X, y): """ Fit the algorithm according to the given training data. Parameters ---------- X : array-like of shape (n_samples, n_features, n_channels) Training samples. y : array-like of shape (n_samples,) True labels for X. Returns ------- self: object Fitted model """ # Fit anomaly detection knn over k-means centroids X_good = X[np.where(y == 0)] X_bad = X[np.where(y != 0)] km = TimeSeriesKMeans(n_clusters=self.n_clusters, metric="euclidean", max_iter=100, random_state=0, n_jobs=-1).fit(X_good) self.knn.fit(km.cluster_centers_, np.zeros((self.n_clusters, ))) # Calculate distances to all samples in good and bad d_bad, _ = self.knn.kneighbors(X_bad) d_good, _ = self.knn.kneighbors(X_good) # Calculate ROC y_true = np.hstack( (np.zeros(X_good.shape[0]), np.ones(X_bad.shape[0]))) y_score = np.vstack((d_good, d_bad)) fpr, tpr, thresholds = roc_curve(y_true, y_score, pos_label=1) # Determine d by Youden index self.d = thresholds[np.argmax(tpr - fpr)] return self def predict(self, X): """ Perform a classification on samples in X. Parameters ---------- X : array-like of shape (n_samples, n_features, n_channels) Test samples. Returns ------- y_pred: array, shape (n_samples,) Predictions """ # Binary predictions of anomaly detector y_pred = np.squeeze(np.where(self.knn.kneighbors(X)[0] < self.d, 0, 1)) return y_pred