def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different detector algorithms. For consistency, outliers are assigned with larger anomaly scores. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ check_is_fitted(self, ['model_', 'history_']) X = check_array(X) if self.preprocessing: X_norm = self.scaler_.transform(X) else: X_norm = np.copy(X) # Predict on X and return the reconstruction errors pred_scores = self.model_.predict(X_norm) return pairwise_distances_no_broadcast(X_norm, pred_scores)
def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different detector algorithms. . Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ check_is_fitted(self, ['model_', 'history_']) X = check_array(X) print("inside") print(X.shape) print(X[0]) X_norm, Y_norm = self._preprocess_data_for_LSTM(X) pred_scores = np.zeros(X.shape) pred_scores[self.window_size:] = self.model_.predict(X_norm) Y_norm_for_decision_scores = np.zeros(X.shape) Y_norm_for_decision_scores[self.window_size:] = Y_norm return pairwise_distances_no_broadcast(Y_norm_for_decision_scores, pred_scores)
def fit(self, X, y=None): """ Fit data to LSTM model. Args: inputs : X , ndarray of size (number of sample,features) Returns: return : self object with trained model """ X = check_array(X) self._set_n_classes(y) self.n_samples_, self.n_features_ = X.shape[0], X.shape[1] X_train, Y_train = self._preprocess_data_for_LSTM(X) self.model_ = self._build_model() self.history_ = self.model_.fit(X_train, Y_train, epochs=self.epochs, batch_size=self.batch_size, validation_split=self.validation_size, verbose=self.verbose).history pred_scores = np.zeros(X.shape) pred_scores[self.window_size:] = self.model_.predict(X_train) Y_train_for_decision_scores = np.zeros(X.shape) Y_train_for_decision_scores[self.window_size:] = Y_train self.decision_scores_ = pairwise_distances_no_broadcast( Y_train_for_decision_scores, pred_scores) self._process_decision_scores() return self
def fit(self, X, y=None, **kwargs): """Fit detector. y is optional for unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : numpy array of shape (n_samples,), optional (default=None) The ground truth of the input samples (labels). """ # validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) # Verify and construct the hidden units self.n_samples_, self.n_features_ = X.shape[0], X.shape[1] # Standardize data for better performance if self.preprocessing: self.scaler_ = StandardScaler() X_norm = self.scaler_.fit_transform(X) else: X_norm = np.copy(X) # Shuffle the data for validation as Keras do not shuffling for # Validation Split np.random.shuffle(X_norm) # Validate and complete the number of hidden neurons if np.min(self.encoder_neurons) > self.n_features_: raise ValueError("The number of neurons should not exceed " "the number of features") # Build VAE model & fit with X self.model_ = self._build_model() self.history_ = self.model_.fit(X_norm, epochs=self.epochs, batch_size=self.batch_size, shuffle=True, validation_split=self.validation_size, verbose=self.verbose, **kwargs).history # Predict on X itself and calculate the reconstruction error as # the outlier scores. Noted X_norm was shuffled has to recreate if self.preprocessing: X_norm = self.scaler_.transform(X) else: X_norm = np.copy(X) pred_scores = self.model_.predict(X_norm) self.decision_scores_ = pairwise_distances_no_broadcast( X_norm, pred_scores) self._process_decision_scores() return self
def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. For consistency, outliers are assigned with larger anomaly scores. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ X = check_array(X) pred_scores = self.model_.predict(X) return pairwise_distances_no_broadcast(X, pred_scores)
def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different detector algorithms. For consistency, outliers are assigned with larger anomaly scores. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ # Predict on X and return the reconstruction errors pred_scores = self.model_.predict(X) return pairwise_distances_no_broadcast(X, pred_scores)
def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different detector algorithms. For consistency, outliers are assigned with larger anomaly scores. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ self.model.eval() dataset = PyODDataset(X=X, mean=self.mean, std=self.std) dataloader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=False) X_reconst = np.zeros([ X.shape[0], ]) with torch.no_grad(): for data, data_idx in train_loader: # print(epoch, data.shape) data_cuda = data.to(self.device).float() # idx = batch[1] # this is the outlier score X_reconst[data_idx] = pairwise_distances_no_broadcast( data, self.model(data_cuda).cpu().numpy()) return X_reconst
def fit(self, X, y=None): """Fit detector. y is optional for unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : numpy array of shape (n_samples,), optional (default=None) The ground truth of the input samples (labels). """ # validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) # Verify and construct the hidden units self.n_samples_, self.n_features_ = X.shape[0], X.shape[1] # # Standardize data for better performance # if self.preprocessing: # self.scaler_ = StandardScaler() # X_norm = self.scaler_.fit_transform(X) # else: # X_norm = np.copy(X) # Shuffle the data for validation as Keras do not shuffling for # Validation Split np.random.shuffle(X) # Validate and complete the number of hidden neurons if np.min(self.hidden_neurons) > self.n_features_: raise ValueError("The number of neurons should not exceed " "the number of features") # self.hidden_neurons_.insert(0, self.n_features_) # Calculate the dimension of the encoding layer & compression rate self.encoding_dim_ = np.median(self.hidden_neurons) self.compression_rate_ = self.n_features_ // self.encoding_dim_ # # Build AE ndm & fit with X self.model_ = self._build_model(X, X, hidden_neurons=self.hidden_neurons) # self.history_ = self.model_.fit(X_norm, X_norm, # epochs=self.epochs, # batch_size=self.batch_size, # shuffle=True, # validation_split=self.validation_size, # verbose=self.verbose).history # # Reverse the operation for consistency # # self.hidden_neurons_.pop(0) # # Predict on X itself and calculate the reconstruction error as # # the outlier scores. Noted X_norm was shuffled has to recreate # if self.preprocessing: # X_norm = self.scaler_.transform(X) # else: # X_norm = np.copy(X) pred_scores = self.model_.predict(X) self.decision_scores_ = pairwise_distances_no_broadcast(X, pred_scores) self._process_decision_scores() return self
def test_pairwise_distances_no_broadcast(self): assert_allclose(pairwise_distances_no_broadcast(self.X, self.Y), [1.41421356, 2.23606798, 4.58257569, 4.12310563]) with assert_raises(ValueError): pairwise_distances_no_broadcast([1, 2, 3], [6])
shuffle=True, validation_split=0.1, verbose=1).history #%% from sklearn.metrics.pairwise import euclidean_distances autoencoder.summary() pred_train = autoencoder.predict(X_train_norm) pred_test = autoencoder.predict(X_test_norm) #%% from pyod.utils.stat_models import pairwise_distances_no_broadcast #error_train = euclidean_distances(X_train_norm, pred_train) #error_test = cdist(X_test_norm, pred_test, metric='euclidean') train_error = pairwise_distances_no_broadcast(X_train_norm, pred_train) test_error = pairwise_distances_no_broadcast(X_test_norm, pred_test) #%% from __future__ import division from __future__ import print_function import os import sys # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line sys.path.append( os.path.abspath(os.path.join(os.path.dirname("__file__"), '..'))) from sklearn.utils import check_X_y
def test_pairwise_distances_no_broadcast(self): assert_allclose(pairwise_distances_no_broadcast(self.X, self.Y), [1.41421356, 2.23606798, 4.58257569, 4.12310563])
def fit(self, X, y=None, model_path='./model.h5', log_path='./logs'): """Fit detector. y is optional for unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : numpy array of shape (n_samples,), optional (default=None) The ground truth of the input samples (labels). """ # validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) # Verify and construct the hidden units self.n_samples_, self.n_features_ = X.shape[0], X.shape[1] # Standardize data for better performance if self.preprocessing: self.scaler_ = StandardScaler() X_norm = self.scaler_.fit_transform(X) else: X_norm = np.copy(X) # Shuffle the data for validation as Keras do not shuffling for # Validation Split np.random.shuffle(X_norm) # Validate and complete the number of hidden neurons if np.min(self.hidden_neurons) > self.n_features_: raise ValueError("The number of neurons should not exceed " "the number of features") self.hidden_neurons_.insert(0, self.n_features_) # Calculate the dimension of the encoding layer & compression rate self.encoding_dim_ = np.median(self.hidden_neurons) self.compression_rate_ = self.n_features_ // self.encoding_dim_ # Build AE model & fit with X self.model_ = self._build_model() es = EarlyStopping(monitor='f1', mode='max', verbose=1, patience=25) cp = ModelCheckpoint(filepath=model_path, save_best_only=True, verbose=0) tb = TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=True) print('Model Save Path:' + str(model_path)) print('Logs Path:' + str(log_path)) print('') self.history_ = self.model_.fit(X_norm, X_norm, epochs=self.epochs, batch_size=self.batch_size, shuffle=True, validation_split=self.validation_size, callbacks=[cp, tb, es], verbose=self.verbose).history # Reverse the operation for consistency self.hidden_neurons_.pop(0) # Predict on X itself and calculate the reconstruction error as # the outlier scores. Noted X_norm was shuffled has to recreate if self.preprocessing: X_norm = self.scaler_.transform(X) else: X_norm = np.copy(X) pred_scores = self.model_.predict(X_norm) self.decision_scores_ = pairwise_distances_no_broadcast(X_norm, pred_scores) self._process_decision_scores() return self
best_model(torch.from_numpy(X_train).float().cuda()) # %% best_model.eval() X_reconst = np.zeros([ n_train, ]) with torch.no_grad(): for idx, batch in enumerate(train_loader): # print(epoch, data.shape) data = batch[0].cuda().float() idx = batch[1] # this is the outlier score X_reconst[idx] = pairwise_distances_no_broadcast( batch[0], best_model(data).cpu().numpy()) # %% class AutoEncoder(BaseDetector): def __init__( self, hidden_neurons=None, # hidden_activation='relu', # output_activation='sigmoid', batch_norm=True, # loss='mse', # optimizer='adam', learning_rate=1e-3,
def fit(self, X, y=None, **kwargs): """Fit detector. y is ignored in unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Fitted estimator. """ # validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) # Verify and construct the hidden units self.n_samples_, self.n_features_ = X.shape[0], X.shape[1] # Standardize data for better performance if self.preprocessing: self.scaler_ = StandardScaler() X_norm = self.scaler_.fit_transform(X) else: X_norm = np.copy(X) # Shuffle the data for validation as Keras do not shuffling for # Validation Split np.random.shuffle(X_norm) # Validate and complete the number of hidden neurons if np.min(self.hidden_neurons) > self.n_features_: raise ValueError("The number of neurons should not exceed " "the number of features") self.hidden_neurons_.insert(0, self.n_features_) # Calculate the dimension of the encoding layer & compression rate self.encoding_dim_ = np.median(self.hidden_neurons) self.compression_rate_ = self.n_features_ // self.encoding_dim_ # Build AE model & fit with X self.model_ = self._build_model() self.history_ = self.model_.fit(X_norm, X_norm, epochs=self.epochs, batch_size=self.batch_size, shuffle=True, validation_split=self.validation_size, verbose=self.verbose, **kwargs).history # Reverse the operation for consistency self.hidden_neurons_.pop(0) # Predict on X itself and calculate the reconstruction error as # the outlier scores. Noted X_norm was shuffled has to recreate if self.preprocessing: X_norm = self.scaler_.transform(X) else: X_norm = np.copy(X) pred_scores = self.model_.predict(X_norm) self.decision_scores_ = pairwise_distances_no_broadcast( X_norm, pred_scores) self._process_decision_scores() return self
def test_pairwise_distances_no_broadcast(self): assert_allclose(pairwise_distances_no_broadcast(self.X, self.Y), [1.41421356, 2.23606798, 4.58257569, 4.12310563]) with assert_raises(ValueError): pairwise_distances_no_broadcast([1, 2, 3], [6])