def score(self, X: np.ndarray, batch_size: int = 64, return_predictions: bool = False) \ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]: """ Compute adversarial scores. Parameters ---------- X Batch of instances to analyze. batch_size Batch size used when computing scores. return_predictions Whether to return the predictions of the classifier on the original and reconstructed instances. Returns ------- Array with adversarial scores for each instance in the batch. """ # reconstructed instances X_recon = predict_batch(self.ae, X, batch_size=batch_size) # model predictions y = predict_batch(self.model, X, batch_size=batch_size, proba=True) y_recon = predict_batch(self.model, X_recon, batch_size=batch_size, proba=True) # scale predictions if self.temperature != 1.: y = y**(1 / self.temperature) y = y / tf.reshape(tf.reduce_sum(y, axis=-1), (-1, 1)) adv_score = kld(y, y_recon).numpy() # hidden layer predictions if isinstance(self.model_hl, list): for m, w in zip(self.model_hl, self.w_model_hl): h = predict_batch(m, X, batch_size=batch_size, proba=True) h_recon = predict_batch(m, X_recon, batch_size=batch_size, proba=True) adv_score += w * kld(h, h_recon).numpy() if return_predictions: return adv_score, y, y_recon else: return adv_score
def score(self, X: np.ndarray) -> np.ndarray: """ Compute adversarial scores. Parameters ---------- X Batch of instances to analyze. Returns ------- Array with adversarial scores for each instance in the batch. """ # sample reconstructed instances X_samples = np.repeat(X, self.samples, axis=0) X_recon = self.vae(X_samples) # model predictions y = self.model(X_samples) y_recon = self.model(X_recon) # KL-divergence between predictions kld_y = kld(y, y_recon).numpy().reshape(-1, self.samples) adv_score = np.mean(kld_y, axis=1) return adv_score
def loss_distillation( x_true: tf.Tensor, y_pred: tf.Tensor, model: tf.keras.Model = None, loss_type: str = 'kld', temperature: float = 1., ) -> tf.Tensor: """ Loss function used for Model Distillation. Parameters ---------- x_true Batch of data points. y_pred Batch of prediction from the distilled model. model tf.keras model. loss_type Type of loss for distillation. Supported 'kld', 'xent. temperature Temperature used for model prediction scaling. Temperature <1 sharpens the prediction probability distribution. Returns ------- Loss value. """ y_true = model(x_true) # apply temperature scaling if temperature != 1.: y_true = y_true**(1 / temperature) y_true = y_true / tf.reshape(tf.reduce_sum(y_true, axis=-1), (-1, 1)) if loss_type == 'kld': loss_dist = kld(y_true, y_pred) elif loss_type == 'xent': loss_dist = categorical_crossentropy(y_true, y_pred, from_logits=False) else: raise NotImplementedError # compute K-L divergence loss loss = tf.reduce_mean(loss_dist) return loss
def score(self, X: np.ndarray, batch_size: int = int(1e10), return_predictions: bool = False) \ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]: """ Compute adversarial scores. Parameters ---------- X Batch of instances to analyze. batch_size Batch size used when computing scores. return_predictions Whether to return the predictions of the classifier on the original and reconstructed instances. Returns ------- Array with adversarial scores for each instance in the batch. """ # model predictions y = predict_batch(self.model, X, batch_size=batch_size, proba=True) y_distilled = predict_batch(self.distilled_model, X, batch_size=batch_size, proba=True) # scale predictions if self.temperature != 1.: y = y**(1 / self.temperature) # type: ignore y = (y / tf.reshape(tf.reduce_sum(y, axis=-1), (-1, 1))).numpy() if self.loss_type == 'kld': score = kld(y, y_distilled).numpy() elif self.loss_type == 'xent': score = categorical_crossentropy(y, y_distilled).numpy() else: raise NotImplementedError if return_predictions: return score, y, y_distilled else: return score
def loss_adv_vae(x_true: tf.Tensor, x_pred: tf.Tensor, model: tf.keras.Model = None, w_model: float = 1., w_recon: float = 0., cov_full: tf.Tensor = None, cov_diag: tf.Tensor = None, sim: float = .05 ) -> tf.Tensor: """ Loss function used for AdversarialVAE. Parameters ---------- x_true Batch of instances. x_pred Batch of reconstructed instances by the variational autoencoder. model A trained tf.keras model with frozen layers (layers.trainable = False). w_model Weight on model prediction loss term. w_recon Weight on elbo loss term. cov_full Full covariance matrix. cov_diag Diagonal (variance) of covariance matrix. sim Scale identity multiplier. Returns ------- Loss value. """ y_true = model(x_true) y_pred = model(x_pred) loss = w_model * tf.reduce_mean(kld(y_true, y_pred)) if w_recon > 0.: loss += w_recon * elbo(x_true, x_pred, cov_full=cov_full, cov_diag=cov_diag, sim=sim) return loss
def loss_adv_ae(x_true: tf.Tensor, x_pred: tf.Tensor, model: tf.keras.Model = None, model_hl: list = None, w_model: float = 1., w_recon: float = 0., w_model_hl: list = None, temperature: float = 1.) -> tf.Tensor: """ Loss function used for AdversarialAE. Parameters ---------- x_true Batch of instances. x_pred Batch of reconstructed instances by the autoencoder. model A trained tf.keras model with frozen layers (layers.trainable = False). model_hl List with tf.keras models used to extract feature maps and make predictions on hidden layers. w_model Weight on model prediction loss term. w_recon Weight on MSE reconstruction error loss term. w_model_hl Weights assigned to the loss of each model in model_hl. temperature Temperature used for model prediction scaling. Temperature <1 sharpens the prediction probability distribution. Returns ------- Loss value. """ y_true = model(x_true) y_pred = model(x_pred) # apply temperature scaling if temperature != 1.: y_true = y_true**(1 / temperature) y_true = y_true / tf.reshape(tf.reduce_sum(y_true, axis=-1), (-1, 1)) # compute K-L divergence loss loss_kld = kld(y_true, y_pred) std_kld = tf.math.reduce_std(loss_kld) loss = tf.reduce_mean(loss_kld) # add loss from optional K-L divergences extracted from hidden layers if isinstance(model_hl, list): if w_model_hl is None: w_model_hl = list(tf.ones(len(model_hl))) for m, w in zip(model_hl, w_model_hl): h_true = m(x_true) h_pred = m(x_pred) loss_kld_hl = tf.reduce_mean(kld(h_true, h_pred)) loss += tf.constant(w) * loss_kld_hl loss *= w_model # add optional reconstruction loss if w_recon > 0.: loss_recon = (x_true - x_pred)**2 std_recon = tf.math.reduce_std(loss_recon) w_scale = std_kld / (std_recon + 1e-10) loss_recon = w_recon * w_scale * tf.reduce_mean(loss_recon) loss += loss_recon return loss else: return loss