def score(self, X): """Calculate anomaly score according to the given test data. Parameters ---------- X : array-like, shape (n_samples, n_features) Error measured vectors, where n_samples is the number of samples and n_features is the number of features. Returns ------- anomaly_score : array-like, shape (n_samples,) Anomaly score. """ # validation X = check_array_type(X) pred = [] for x in X: if self.M > 1: a = (x-self.mean_val)@self.cov_val_inv@(x-self.mean_val) elif self.M == 1: a = (x-self.mean_val)**2*self.cov_val_inv # T2 = (self.N - self.M)/((self.N + 1) * self.M) * a # prob = f.pdf(T2, self.M, self.N-self.M) pred.append(a) return np.asarray(pred)
def fit(self, y, threshold): """Fit the model according to the given train data. Parameters ---------- y : array-like, shape (n_samples, ) Normal measured vectors, where n_samples is the number of samples. threshold: float Size of the shift that is to be detected. Returns ------- self : object """ # validation y = check_array_type(y) check_array_feature_dimension(y, 1) self.normal_mean = np.mean(y) self.normal_std = np.std(y) self.error_mean = threshold self.nu = self.error_mean - self.normal_mean if self.nu > 0: self.uppper = True else: self.uppper = False return self
def fit(self, X, normalize=True): """Fit the model according to the given train data. Parameters ---------- X : array-like, shape (n_samples, n_features) Normal measured vectors, where n_samples is the number of samples and n_features is the number of features. normalize: bool If True, normalize input array. Returns ------- self : object """ # validation X = check_array_type(X) if normalize: X = tensor_normalize(X, axis=1) self.mean_val = X.mean(axis=0).reshape(-1, 1) return self
def fit(self, X): """Fit the Hotelling's t-squared model according to the given train data. Parameters ---------- X : array-like, shape (n_samples, n_features) Normal measured vectors, where n_samples is the number of samples and n_features is the number of features. Returns ------- self : object """ # validation X = check_array_type(X) self.N, self.M = X.shape self.mean_val = X.mean(axis=0) if self.M > 1: self.cov_val_inv = np.linalg.inv(np.cov(X, rowvar=0, bias=1)) elif self.M == 1: self.cov_val_inv = np.array([1/np.var(X)]) else: raise ValueError("Input shape is incorrect") return self
def fit(self, X, rho=0.01, normalize=True): """Fit the model according to the given train data. Parameters ---------- X : array-like, shape (n_samples, n_features) Normal measured vectors, where n_samples is the number of samples. rho: float Inverse of the scale. The smaller this is, precision matrix elements become sparse. normalize: bool If True, normalize input array. Returns ------- self : object """ # validation X = check_array_type(X) self.feature_size = X.shape[1] if self.feature_size <= 1: raise ValueError(f"Feature size must be >=2") self.pmatrix, self.pmatrix_inv, self.cov, self.best_loss = self._solve( X, rho=rho, normalize=normalize) return self
def outlier_analysis_score(self, X): """Calculate anomaly score according to the given test data. Parameters ---------- X : array-like, shape (n_samples, n_features) Error measured vectors, where n_samples is the number of samples and n_features is the number of features. Returns ------- anomaly_score : array-like, shape (n_samples, n_features) Anomaly score. """ # validation X = check_array_type(X) if self.feature_size != X.shape[1]: raise ValueError(f"Feature size must be same as training data") diag = np.diag(self.pmatrix) anomaly_score = [] for x in X: a = np.log(2*np.pi/diag)/2 + ([email protected])**2/(2*diag) anomaly_score.append(a) return np.array(anomaly_score)
def score(self, X): """Calculate anomaly score according to the given test data. Parameters ---------- X: array-like, shape (n_samples, n_features) Error measured vectors, where n_samples is the number of samples and n_features is the number of features. Returns ------- anomaly_score : array-like, shape (n_samples,) Anomaly score. """ # validation X = check_array_type(X) anomaly_score = 1 - [email protected]_val return anomaly_score
def anomaly_analysis_score(self, X, rho=0.01, normalize=True): """Calculate anomaly score for each feature according to the given test data. Parameters ---------- X : array-like, shape (n_samples, n_features) Error measured vectors, where n_samples is the number of samples and n_features is the number of features. rho: float Inverse of the scale. The smaller this is, precision matrix elements become sparse. normalize: bool If True, normalize input array. Returns ------- anomaly_score : array-like, shape (n_samples, n_features) Anomaly score. precision_matrix : array-like, shape (n_features, n_features) Precision matrix of error measured vectors. """ # validation X = check_array_type(X) if self.feature_size != X.shape[1]: raise ValueError(f"Feature size must be same as training data") self.pmatrix_new, self.pmatrix_inv_new, self.cov_new, self.best_loss_new = self._solve( X, rho=rho, normalize=normalize) diag = np.diag(self.pmatrix).reshape(-1) diag_new = np.diag(self.pmatrix_new).reshape(-1) diag_S = np.diag([email protected]@self.pmatrix).reshape(-1) diag_S_new = np.diag([email protected]_new@ self.pmatrix_new).reshape(-1) a = np.log(diag/diag_new)/2 - (diag_S/diag - diag_S_new/diag_new)/2 return a, self.pmatrix_new
def score(self, y_test, cumsum_on=True): """Calculate anomaly score according to the given test data. Parameters ---------- y_test : array-like, shape (n_samples,) Error measured vectors, where n_samples is the number of samples. cumsum_on: bool If True, return cumsumed anomaly score. If False, return pure anomaly score. Returns ------- anomaly_score : array-like, shape (n_samples,) Anomaly score. """ # validation y_test = check_array_type(y_test) check_array_feature_dimension(y_test, 1) if self.uppper: anomaly_socre = self.nu * \ (y_test - self.normal_mean - self.nu/2)/self.normal_std**2 else: anomaly_socre = -1*self.nu * \ (y_test - self.normal_mean + self.nu/2)/self.normal_std**2 a_operated = 0 anomaly_socre_cumsum = [] for a in anomaly_socre: a += a_operated a_operated = np.maximum(a, 0) anomaly_socre_cumsum.append(a_operated) anomaly_socre_cumsum = np.array(anomaly_socre_cumsum) if cumsum_on: return anomaly_socre_cumsum else: return anomaly_socre
def fit(self, y, window_size=50, trajectory_n=25, trajectory_pattern=3, test_n=25, test_pattern=2, lag=25): """Fit the DensityRatioEstimation model according to the given data. Parameters ---------- y : array-like, shape (n_samples,) measured vectors contain error, where n_samples is the number of samples. trajectory_n: int Number of row of trajectory matrix. trajectory_pattern: int Number of trajectory matrix's left singular vectors selected as principal subspace. test_n: int Number of row of test matrix. test_pattern: int Number of test matrix's left singular vectors selected as principal subspace. lag: int Lag between trajectory matrix and test matrix. Returns ------- self : object """ assert window_size < len(y) + 1 assert trajectory_pattern <= window_size assert test_pattern <= window_size assert trajectory_n >= 1 assert test_n >= 1 assert 0 <= lag < len(y) - window_size - test_n - 1 # validation y = check_array_type(y) check_array_feature_dimension(y, 1) y = y.reshape(-1) X = np.asarray( [y[i:i + window_size] for i in range(len(y) - window_size - 1)]) anomaly_score = [] for t in range(window_size + test_n + 1, len(y) - lag): # trajectory matrix and test matrix at t X_t = X[t - trajectory_n - window_size:t - window_size].T Z_t = X[t - test_n + lag - window_size:t - window_size + lag].T # SVD U, s, _ = np.linalg.svd(X_t) U = U[:, :trajectory_pattern] Q, _, _ = np.linalg.svd(Z_t) Q = Q[:, :test_pattern] UhQ = np.dot(U.T, Q) _, s, _ = np.linalg.svd(UhQ) a = 1 - s[0] # regularize if a < 10e-10: a = 0 anomaly_score.append(a) self.__score = np.array(anomaly_score) return self