def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        check_is_fitted(self, ['model_', 'history_'])
        X = check_array(X)

        if self.preprocessing:
            X_norm = self.scaler_.transform(X)
        else:
            X_norm = np.copy(X)

        # Predict on X and return the reconstruction errors
        pred_scores = self.model_.predict(X_norm)
        return pairwise_distances_no_broadcast(X_norm, pred_scores)
예제 #2
0
파일: DeepLog.py 프로젝트: zwbjtu123/tods
    def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.
        The anomaly score of an input sample is computed based on different
        detector algorithms. .
        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.
        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        check_is_fitted(self, ['model_', 'history_'])

        X = check_array(X)
        print("inside")
        print(X.shape)
        print(X[0])
        X_norm, Y_norm = self._preprocess_data_for_LSTM(X)
        pred_scores = np.zeros(X.shape)
        pred_scores[self.window_size:] = self.model_.predict(X_norm)
        Y_norm_for_decision_scores = np.zeros(X.shape)
        Y_norm_for_decision_scores[self.window_size:] = Y_norm
        return pairwise_distances_no_broadcast(Y_norm_for_decision_scores,
                                               pred_scores)
예제 #3
0
파일: DeepLog.py 프로젝트: zwbjtu123/tods
    def fit(self, X, y=None):
        """
        Fit data to  LSTM model.
        Args:
            inputs : X , ndarray of size (number of sample,features)

        Returns:
            return : self object with trained model
        """

        X = check_array(X)
        self._set_n_classes(y)
        self.n_samples_, self.n_features_ = X.shape[0], X.shape[1]

        X_train, Y_train = self._preprocess_data_for_LSTM(X)

        self.model_ = self._build_model()
        self.history_ = self.model_.fit(X_train,
                                        Y_train,
                                        epochs=self.epochs,
                                        batch_size=self.batch_size,
                                        validation_split=self.validation_size,
                                        verbose=self.verbose).history
        pred_scores = np.zeros(X.shape)
        pred_scores[self.window_size:] = self.model_.predict(X_train)

        Y_train_for_decision_scores = np.zeros(X.shape)
        Y_train_for_decision_scores[self.window_size:] = Y_train
        self.decision_scores_ = pairwise_distances_no_broadcast(
            Y_train_for_decision_scores, pred_scores)

        self._process_decision_scores()
        return self
예제 #4
0
    def fit(self, X, y=None, **kwargs):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        # Verify and construct the hidden units
        self.n_samples_, self.n_features_ = X.shape[0], X.shape[1]

        # Standardize data for better performance
        if self.preprocessing:
            self.scaler_ = StandardScaler()
            X_norm = self.scaler_.fit_transform(X)
        else:
            X_norm = np.copy(X)

        # Shuffle the data for validation as Keras do not shuffling for
        # Validation Split
        np.random.shuffle(X_norm)

        # Validate and complete the number of hidden neurons
        if np.min(self.encoder_neurons) > self.n_features_:
            raise ValueError("The number of neurons should not exceed "
                             "the number of features")

        # Build VAE model & fit with X
        self.model_ = self._build_model()
        self.history_ = self.model_.fit(X_norm,
                                        epochs=self.epochs,
                                        batch_size=self.batch_size,
                                        shuffle=True,
                                        validation_split=self.validation_size,
                                        verbose=self.verbose,
                                        **kwargs).history
        # Predict on X itself and calculate the reconstruction error as
        # the outlier scores. Noted X_norm was shuffled has to recreate
        if self.preprocessing:
            X_norm = self.scaler_.transform(X)
        else:
            X_norm = np.copy(X)

        pred_scores = self.model_.predict(X_norm)
        self.decision_scores_ = pairwise_distances_no_broadcast(
            X_norm, pred_scores)
        self._process_decision_scores()
        return self
예제 #5
0
파일: ae.py 프로젝트: Learn-Live/kjl_reload
    def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.
        For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """

        X = check_array(X)
        pred_scores = self.model_.predict(X)
        return pairwise_distances_no_broadcast(X, pred_scores)
예제 #6
0
    def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

		The anomaly score of an input sample is computed based on different
		detector algorithms. For consistency, outliers are assigned with
		larger anomaly scores.

		Parameters
		----------
		X : numpy array of shape (n_samples, n_features)
			The training input samples. Sparse matrices are accepted only
			if they are supported by the base estimator.

		Returns
		-------
		anomaly_scores : numpy array of shape (n_samples,)
			The anomaly score of the input samples.
		"""
        # Predict on X and return the reconstruction errors
        pred_scores = self.model_.predict(X)
        return pairwise_distances_no_broadcast(X, pred_scores)
예제 #7
0
    def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        self.model.eval()

        dataset = PyODDataset(X=X, mean=self.mean, std=self.std)
        dataloader = torch.utils.data.DataLoader(dataset,
                                                 batch_size=self.batch_size,
                                                 shuffle=False)

        X_reconst = np.zeros([
            X.shape[0],
        ])
        with torch.no_grad():
            for data, data_idx in train_loader:
                # print(epoch, data.shape)
                data_cuda = data.to(self.device).float()
                # idx = batch[1]
                # this is the outlier score
                X_reconst[data_idx] = pairwise_distances_no_broadcast(
                    data,
                    self.model(data_cuda).cpu().numpy())

        return X_reconst
예제 #8
0
    def fit(self, X, y=None):
        """Fit detector. y is optional for unsupervised methods.

		Parameters
		----------
		X : numpy array of shape (n_samples, n_features)
			The input samples.

		y : numpy array of shape (n_samples,), optional (default=None)
			The ground truth of the input samples (labels).
		"""
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        # Verify and construct the hidden units
        self.n_samples_, self.n_features_ = X.shape[0], X.shape[1]

        # # Standardize data for better performance
        # if self.preprocessing:
        #     self.scaler_ = StandardScaler()
        #     X_norm = self.scaler_.fit_transform(X)
        # else:
        #     X_norm = np.copy(X)

        # Shuffle the data for validation as Keras do not shuffling for
        # Validation Split
        np.random.shuffle(X)

        # Validate and complete the number of hidden neurons
        if np.min(self.hidden_neurons) > self.n_features_:
            raise ValueError("The number of neurons should not exceed "
                             "the number of features")
        # self.hidden_neurons_.insert(0, self.n_features_)

        # Calculate the dimension of the encoding layer & compression rate
        self.encoding_dim_ = np.median(self.hidden_neurons)
        self.compression_rate_ = self.n_features_ // self.encoding_dim_

        # # Build AE ndm & fit with X
        self.model_ = self._build_model(X,
                                        X,
                                        hidden_neurons=self.hidden_neurons)
        # self.history_ = self.model_.fit(X_norm, X_norm,
        #                                 epochs=self.epochs,
        #                                 batch_size=self.batch_size,
        #                                 shuffle=True,
        #                                 validation_split=self.validation_size,
        #                                 verbose=self.verbose).history

        # # Reverse the operation for consistency
        # # self.hidden_neurons_.pop(0)
        # # Predict on X itself and calculate the reconstruction error as
        # # the outlier scores. Noted X_norm was shuffled has to recreate
        # if self.preprocessing:
        #     X_norm = self.scaler_.transform(X)
        # else:
        #     X_norm = np.copy(X)

        pred_scores = self.model_.predict(X)
        self.decision_scores_ = pairwise_distances_no_broadcast(X, pred_scores)
        self._process_decision_scores()
        return self
예제 #9
0
    def test_pairwise_distances_no_broadcast(self):
        assert_allclose(pairwise_distances_no_broadcast(self.X, self.Y),
                        [1.41421356, 2.23606798, 4.58257569, 4.12310563])

        with assert_raises(ValueError):
            pairwise_distances_no_broadcast([1, 2, 3], [6])
예제 #10
0
                          shuffle=True,
                          validation_split=0.1,
                          verbose=1).history
#%%
from sklearn.metrics.pairwise import euclidean_distances

autoencoder.summary()
pred_train = autoencoder.predict(X_train_norm)
pred_test = autoencoder.predict(X_test_norm)
#%%
from pyod.utils.stat_models import pairwise_distances_no_broadcast

#error_train = euclidean_distances(X_train_norm, pred_train)
#error_test = cdist(X_test_norm, pred_test, metric='euclidean')

train_error = pairwise_distances_no_broadcast(X_train_norm, pred_train)
test_error = pairwise_distances_no_broadcast(X_test_norm, pred_test)

#%%
from __future__ import division
from __future__ import print_function

import os
import sys

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

from sklearn.utils import check_X_y
예제 #11
0
 def test_pairwise_distances_no_broadcast(self):
     assert_allclose(pairwise_distances_no_broadcast(self.X, self.Y),
                     [1.41421356, 2.23606798, 4.58257569, 4.12310563])
    def fit(self, X, y=None, model_path='./model.h5', log_path='./logs'):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        # Verify and construct the hidden units
        self.n_samples_, self.n_features_ = X.shape[0], X.shape[1]

        # Standardize data for better performance
        if self.preprocessing:
            self.scaler_ = StandardScaler()
            X_norm = self.scaler_.fit_transform(X)
        else:
            X_norm = np.copy(X)

        # Shuffle the data for validation as Keras do not shuffling for
        # Validation Split
        np.random.shuffle(X_norm)

        # Validate and complete the number of hidden neurons
        if np.min(self.hidden_neurons) > self.n_features_:
            raise ValueError("The number of neurons should not exceed "
                             "the number of features")
        self.hidden_neurons_.insert(0, self.n_features_)

        # Calculate the dimension of the encoding layer & compression rate
        self.encoding_dim_ = np.median(self.hidden_neurons)
        self.compression_rate_ = self.n_features_ // self.encoding_dim_

        # Build AE model & fit with X
        self.model_ = self._build_model()

        es = EarlyStopping(monitor='f1', mode='max', verbose=1, patience=25)

        cp = ModelCheckpoint(filepath=model_path,
                               save_best_only=True,
                               verbose=0)

        tb = TensorBoard(log_dir=log_path,
                        histogram_freq=0,
                        write_graph=True,
                        write_images=True)

        print('Model Save Path:' + str(model_path))
        print('Logs Path:' + str(log_path))
        print('')

        self.history_ = self.model_.fit(X_norm, X_norm,
                                        epochs=self.epochs,
                                        batch_size=self.batch_size,
                                        shuffle=True,
                                        validation_split=self.validation_size,
                                        callbacks=[cp, tb, es],
                                        verbose=self.verbose).history
        # Reverse the operation for consistency
        self.hidden_neurons_.pop(0)
        # Predict on X itself and calculate the reconstruction error as
        # the outlier scores. Noted X_norm was shuffled has to recreate
        if self.preprocessing:
            X_norm = self.scaler_.transform(X)
        else:
            X_norm = np.copy(X)

        pred_scores = self.model_.predict(X_norm)
        self.decision_scores_ = pairwise_distances_no_broadcast(X_norm,
                                                                pred_scores)
        self._process_decision_scores()
        return self
예제 #13
0
best_model(torch.from_numpy(X_train).float().cuda())

# %%
best_model.eval()

X_reconst = np.zeros([
    n_train,
])
with torch.no_grad():
    for idx, batch in enumerate(train_loader):
        # print(epoch, data.shape)
        data = batch[0].cuda().float()
        idx = batch[1]
        # this is the outlier score
        X_reconst[idx] = pairwise_distances_no_broadcast(
            batch[0],
            best_model(data).cpu().numpy())

# %%


class AutoEncoder(BaseDetector):
    def __init__(
            self,
            hidden_neurons=None,
            # hidden_activation='relu',
            # output_activation='sigmoid',
            batch_norm=True,
            # loss='mse',
            # optimizer='adam',
            learning_rate=1e-3,
예제 #14
0
    def fit(self, X, y=None, **kwargs):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        # Verify and construct the hidden units
        self.n_samples_, self.n_features_ = X.shape[0], X.shape[1]

        # Standardize data for better performance
        if self.preprocessing:
            self.scaler_ = StandardScaler()
            X_norm = self.scaler_.fit_transform(X)
        else:
            X_norm = np.copy(X)

        # Shuffle the data for validation as Keras do not shuffling for
        # Validation Split
        np.random.shuffle(X_norm)

        # Validate and complete the number of hidden neurons
        if np.min(self.hidden_neurons) > self.n_features_:
            raise ValueError("The number of neurons should not exceed "
                             "the number of features")
        self.hidden_neurons_.insert(0, self.n_features_)

        # Calculate the dimension of the encoding layer & compression rate
        self.encoding_dim_ = np.median(self.hidden_neurons)
        self.compression_rate_ = self.n_features_ // self.encoding_dim_

        # Build AE model & fit with X
        self.model_ = self._build_model()
        self.history_ = self.model_.fit(X_norm,
                                        X_norm,
                                        epochs=self.epochs,
                                        batch_size=self.batch_size,
                                        shuffle=True,
                                        validation_split=self.validation_size,
                                        verbose=self.verbose,
                                        **kwargs).history
        # Reverse the operation for consistency
        self.hidden_neurons_.pop(0)
        # Predict on X itself and calculate the reconstruction error as
        # the outlier scores. Noted X_norm was shuffled has to recreate
        if self.preprocessing:
            X_norm = self.scaler_.transform(X)
        else:
            X_norm = np.copy(X)

        pred_scores = self.model_.predict(X_norm)
        self.decision_scores_ = pairwise_distances_no_broadcast(
            X_norm, pred_scores)
        self._process_decision_scores()
        return self
예제 #15
0
    def test_pairwise_distances_no_broadcast(self):
        assert_allclose(pairwise_distances_no_broadcast(self.X, self.Y),
                        [1.41421356, 2.23606798, 4.58257569, 4.12310563])

        with assert_raises(ValueError):
            pairwise_distances_no_broadcast([1, 2, 3], [6])