def score(self, X: np.ndarray, batch_size: int = 64, return_predictions: bool = False) \
            -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
        """
        Compute adversarial scores.

        Parameters
        ----------
        X
            Batch of instances to analyze.
        batch_size
            Batch size used when computing scores.
        return_predictions
            Whether to return the predictions of the classifier on the original and reconstructed instances.

        Returns
        -------
        Array with adversarial scores for each instance in the batch.
        """
        # reconstructed instances
        X_recon = predict_batch(self.ae, X, batch_size=batch_size)

        # model predictions
        y = predict_batch(self.model, X, batch_size=batch_size, proba=True)
        y_recon = predict_batch(self.model,
                                X_recon,
                                batch_size=batch_size,
                                proba=True)

        # scale predictions
        if self.temperature != 1.:
            y = y**(1 / self.temperature)
            y = y / tf.reshape(tf.reduce_sum(y, axis=-1), (-1, 1))

        adv_score = kld(y, y_recon).numpy()

        # hidden layer predictions
        if isinstance(self.model_hl, list):
            for m, w in zip(self.model_hl, self.w_model_hl):
                h = predict_batch(m, X, batch_size=batch_size, proba=True)
                h_recon = predict_batch(m,
                                        X_recon,
                                        batch_size=batch_size,
                                        proba=True)
                adv_score += w * kld(h, h_recon).numpy()

        if return_predictions:
            return adv_score, y, y_recon
        else:
            return adv_score
Example #2
0
def preprocess_drift(
    X: np.ndarray,
    model: tf.keras.Model = None,
    tokenizer=None,
    max_len: int = None,
    batch_size: int = int(1e10)) -> np.ndarray:
    """
    Prediction function used for preprocessing step of drift detector.

    Parameters
    ----------
    X
        Batch of instances.
    model
        Model used for preprocessing.
    tokenizer
        Optional tokenizer for text drift.
    max_len
        Optional max token length for text drift.
    batch_size
        Batch size.

    Returns
    -------
    Numpy array with predictions.
    """
    if tokenizer is None:
        return predict_batch(model, X, batch_size=batch_size)
    else:
        return predict_batch_transformer(model,
                                         tokenizer,
                                         X,
                                         max_len,
                                         batch_size=batch_size)
Example #3
0
    def score(self, X: np.ndarray, outlier_perc: float = 100., batch_size: int = int(1e10)) \
            -> Tuple[np.ndarray, np.ndarray]:
        """
        Compute feature and instance level outlier scores.

        Parameters
        ----------
        X
            Univariate or multivariate time series.
        outlier_perc
            Percentage of sorted feature level outlier scores used to predict instance level outlier.
        batch_size
            Batch size used when making predictions with the seq2seq model.

        Returns
        -------
        Feature and instance level outlier scores.
        """
        # use the seq2seq model to reconstruct instances
        orig_shape = X.shape
        if len(orig_shape) == 2:
            X = X.reshape(self.shape)
        X_recon, threshold_est = predict_batch(self.seq2seq.decode_seq,
                                               X,
                                               batch_size=batch_size)

        if len(orig_shape) == 2:  # reshape back to original shape
            X = X.reshape(orig_shape)
            X_recon = X_recon.reshape(orig_shape)
            threshold_est = threshold_est.reshape(orig_shape)

        # compute feature and instance level scores
        fscore = self.feature_score(X, X_recon, threshold_est)
        iscore = self.instance_score(fscore, outlier_perc=outlier_perc)
        return fscore, iscore
Example #4
0
    def score(self, X: np.ndarray, batch_size: int = int(1e10)) -> np.ndarray:
        """
        Compute outlier scores.

        Parameters
        ----------
        X
            Batch of instances to analyze.
        batch_size
            Batch size used when making predictions with the VAEGMM.

        Returns
        -------
        Array with outlier scores for each instance in the batch.
        """
        # draw samples from latent space
        X_samples = np.repeat(X, self.samples, axis=0)
        _, z, _ = predict_batch(self.vaegmm, X_samples, batch_size=batch_size)

        # compute average energy for samples
        energy, _ = gmm_energy(z,
                               self.phi,
                               self.mu,
                               self.cov,
                               self.L,
                               self.log_det_cov,
                               return_mean=False)
        energy_samples = energy.numpy().reshape((-1, self.samples))
        iscore = np.mean(energy_samples, axis=-1)
        return iscore
Example #5
0
    def score(self, X: np.ndarray, outlier_perc: float = 100., batch_size: int = int(1e10)) \
            -> Tuple[np.ndarray, np.ndarray]:
        """
        Compute feature and instance level outlier scores.

        Parameters
        ----------
        X
            Batch of instances.
        outlier_perc
            Percentage of sorted feature level outlier scores used to predict instance level outlier.
        batch_size
            Batch size used when making predictions with the VAE.

        Returns
        -------
        Feature and instance level outlier scores.
        """
        # sample reconstructed instances
        X_samples = np.repeat(X, self.samples, axis=0)
        X_recon = predict_batch(self.vae, X_samples, batch_size=batch_size)

        # compute feature and instance level scores
        fscore = self.feature_score(X_samples, X_recon)
        iscore = self.instance_score(fscore, outlier_perc=outlier_perc)

        return fscore, iscore
Example #6
0
def hidden_output(
    X: np.ndarray,
    model: tf.keras.Model = None,
    layer: int = -1,
    input_shape: tuple = None,
    batch_size: int = int(1e10)) -> np.ndarray:
    """
    Return hidden layer output from a model on a batch of instances.

    Parameters
    ----------
    X
        Batch of instances.
    model
        tf.keras.Model.
    layer
        Hidden layer of model to use as output. The default of -1 would refer to the softmax layer.
    input_shape
        Optional input layer shape.
    batch_size
        Batch size used for the model predictions.

    Returns
    -------
    Model predictions using the specified hidden layer as output layer.
    """
    if input_shape and not model.inputs:
        inputs = Input(shape=input_shape)
        model.call(inputs)
    else:
        inputs = model.inputs
    hidden_model = Model(inputs=inputs, outputs=model.layers[layer].output)
    X_hidden = predict_batch(hidden_model, X, batch_size=batch_size)
    return X_hidden
Example #7
0
    def score(self, X: np.ndarray, batch_size: int = int(1e10), return_predictions: bool = False) \
            -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
        """
        Compute adversarial scores.

        Parameters
        ----------
        X
            Batch of instances to analyze.
        batch_size
            Batch size used when computing scores.
        return_predictions
            Whether to return the predictions of the classifier on the original and reconstructed instances.

        Returns
        -------
        Array with adversarial scores for each instance in the batch.
        """

        # model predictions
        y = predict_batch(self.model, X, batch_size=batch_size, proba=True)
        y_distilled = predict_batch(self.distilled_model,
                                    X,
                                    batch_size=batch_size,
                                    proba=True)

        # scale predictions
        if self.temperature != 1.:
            y = y**(1 / self.temperature)  # type: ignore
            y = (y / tf.reshape(tf.reduce_sum(y, axis=-1), (-1, 1))).numpy()

        if self.loss_type == 'kld':
            score = kld(y, y_distilled).numpy()
        elif self.loss_type == 'xent':
            score = categorical_crossentropy(y, y_distilled).numpy()
        else:
            raise NotImplementedError

        if return_predictions:
            return score, y, y_distilled
        else:
            return score
Example #8
0
def test_predict_batch(update_predict_batch):
    model, proba, return_class, shape = update_predict_batch
    preds = predict_batch(model,
                          X,
                          proba=proba,
                          return_class=return_class,
                          shape=shape)
    if isinstance(model, AE):
        assert preds.shape == X.shape
    elif isinstance(model, tf.keras.Model) and proba:
        assert preds.shape == (n, n_classes)
    elif isinstance(model, tf.keras.Model) and not proba and return_class:
        assert preds.shape == (n, )
    elif isinstance(model, tf.keras.Model) and shape:
        assert preds.shape == shape
Example #9
0
def uae(X: np.ndarray,
        encoder_net: tf.keras.Sequential = None,
        enc_dim: int = None,
        batch_size: int = int(1e10)) -> np.ndarray:
    """
    Dimensionality reduction with an untrained autoencoder.

    Parameters
    ----------
    X
        Batch of instances.
    encoder_net
        Encoder network as a tf.keras.Sequential model.
    enc_dim
        Alternatively, only the dimension of the encoding can be provided and
        a default network with 2 hidden layers is constructed.
    batch_size
        Batch size used when making predictions with the autoencoder.

    Returns
    -------
    Encoded batch of instances.
    """
    is_tf_seq = isinstance(encoder_net, tf.keras.Sequential)
    is_enc_dim = isinstance(enc_dim, int)
    if not is_tf_seq and is_enc_dim:  # set default encoder
        input_dim = np.prod(X.shape[1:])
        step_dim = int((input_dim - enc_dim) / 3)
        encoder_net = tf.keras.Sequential([
            InputLayer(input_shape=X.shape[1:]),
            Flatten(),
            Dense(enc_dim + 2 * step_dim, activation=tf.nn.relu),
            Dense(enc_dim + step_dim, activation=tf.nn.relu),
            Dense(enc_dim, activation=None)
        ])
    elif not is_tf_seq and not is_enc_dim:
        raise ValueError(
            'Need to provide either `enc_dim` or a tf.keras.Sequential `encoder_net`.'
        )
    enc = EncoderAE(encoder_net)
    X_enc = predict_batch(enc, X, batch_size=batch_size)
    return X_enc
Example #10
0
    def logp(self, dist, X: np.ndarray, return_per_feature: bool = False, batch_size: int = int(1e10)) \
            -> np.ndarray:
        """
        Compute log probability of a batch of instances under the generative model.

        Parameters
        ----------
        dist
            Distribution of the model.
        X
            Batch of instances.
        return_per_feature
            Return log probability per feature.
        batch_size
            Batch size for the generative model evaluations.

        Returns
        -------
        Log probabilities.
        """
        logp_fn = partial(dist.log_prob, return_per_feature=return_per_feature)
        return predict_batch(logp_fn, X, batch_size=batch_size)
Example #11
0
    def logp_alt(
        self,
        model: tf.keras.Model,
        X: np.ndarray,
        return_per_feature: bool = False,
        batch_size: int = int(1e10)
    ) -> np.ndarray:
        """
        Compute log probability of a batch of instances using the log_prob function
        defined by the user.

        Parameters
        ----------
        model
            Trained model.
        X
            Batch of instances.
        return_per_feature
            Return log probability per feature.
        batch_size
            Batch size for the generative model evaluations.

        Returns
        -------
        Log probabilities.
        """
        if self.sequential:
            y, X = X[:, 1:], X[:, :-1]
        else:
            y = X.copy()
        y_preds = predict_batch(model, X, batch_size=batch_size)
        logp = self.log_prob(y, y_preds).numpy()
        if return_per_feature:
            return logp
        else:
            axis = tuple(np.arange(len(logp.shape))[1:])
            return np.mean(logp, axis=axis)
Example #12
0
    def score(self, X: np.ndarray, batch_size: int = int(1e10)) -> np.ndarray:
        """
        Compute outlier scores.

        Parameters
        ----------
        X
            Batch of instances to analyze.
        batch_size
            Batch size used when making predictions with the AEGMM.

        Returns
        -------
        Array with outlier scores for each instance in the batch.
        """
        _, z, _ = predict_batch(self.aegmm, X, batch_size=batch_size)
        energy, _ = gmm_energy(z,
                               self.phi,
                               self.mu,
                               self.cov,
                               self.L,
                               self.log_det_cov,
                               return_mean=False)
        return energy.numpy()
Example #13
0
    def fit(self,
            X: np.ndarray,
            mutate_fn: Callable = mutate_categorical,
            mutate_fn_kwargs: dict = {
                'rate': .2,
                'seed': 0,
                'feature_range': (0, 255)
            },
            mutate_batch_size: int = int(1e10),
            loss_fn: tf.keras.losses = None,
            loss_fn_kwargs: dict = None,
            optimizer: tf.keras.optimizers = tf.keras.optimizers.Adam(
                learning_rate=1e-3),
            epochs: int = 20,
            batch_size: int = 64,
            verbose: bool = True,
            log_metric: Tuple[str, "tf.keras.metrics"] = None,
            callbacks: tf.keras.callbacks = None) -> None:
        """
        Train semantic and background generative models.

        Parameters
        ----------
        X
            Training batch.
        mutate_fn
            Mutation function used to generate the background dataset.
        mutate_fn_kwargs
            Kwargs for the mutation function used to generate the background dataset.
            Default values set for an image dataset.
        mutate_batch_size
            Batch size used to generate the mutations for the background dataset.
        loss_fn
            Loss function used for training.
        loss_fn_kwargs
            Kwargs for loss function.
        optimizer
            Optimizer used for training.
        epochs
            Number of training epochs.
        batch_size
            Batch size used for training.
        verbose
            Whether to print training progress.
        log_metric
            Additional metrics whose progress will be displayed if verbose equals True.
        callbacks
            Callbacks used during training.
        """
        input_shape = X.shape[1:]

        # training arguments
        kwargs = {
            'epochs': epochs,
            'batch_size': batch_size,
            'verbose': verbose,
            'callbacks': callbacks
        }

        # create background data
        mutate_fn = partial(mutate_fn, **mutate_fn_kwargs)
        X_back = predict_batch(mutate_fn,
                               X,
                               batch_size=mutate_batch_size,
                               shape=X.shape,
                               dtype=X.dtype)

        # prepare sequential data
        if self.sequential and not self.has_log_prob:
            y, y_back = X[:, 1:], X_back[:, 1:]  # type: ignore
            X, X_back = X[:, :-1], X_back[:, :-1]  # type: ignore
        else:
            y, y_back = None, None

        # check if model needs to be built
        use_build = True if self.has_log_prob and not isinstance(
            self.dist_s, tf.keras.Model) else False

        if use_build:
            # build and train semantic model
            self.model_s = build_model(self.dist_s, input_shape)[0]
            self.model_s.compile(optimizer=optimizer)
            self.model_s.fit(X, **kwargs)
            # build and train background model
            self.model_b = build_model(self.dist_b, input_shape)[0]
            self.model_b.compile(optimizer=optimizer)
            self.model_b.fit(X_back, **kwargs)
        else:
            # update training arguments
            kwargs.update({
                'optimizer': optimizer,
                'loss_fn_kwargs': loss_fn_kwargs,
                'log_metric': log_metric
            })

            # train semantic model
            args = [self.dist_s, loss_fn, X]
            kwargs.update({'y_train': y})
            trainer(*args, **kwargs)

            # train background model
            args = [self.dist_b, loss_fn, X_back]
            kwargs.update({'y_train': y_back})
            trainer(*args, **kwargs)