def score_trust(self, data_test, labels_pred, is_train=False): """ Calculate the trust score of each test sample given its classifier-predicted labels. The score is non-negative, with higher values corresponding to a higher level of trust in the classifier's prediction. :param data: numpy array with the test data of shape `(n, d)`, where `n` and `d` are the number of samples and the dimension respectively. :param labels_pred: numpy array of the classifier-predicted labels for the samples in `data_test`. Should have shape `(n, )`. :param is_train: Set to true if this data was used to also passed to the `fit` method for estimation. :return: numpy array of trust scores for each test sample. """ if is_train: return self.scores_estim if self.model_dim_reduction: data_test = transform_data_from_model(data_test, self.model_dim_reduction) distance_level_sets = np.zeros((data_test.shape[0], self.n_classes)) for j, c in enumerate(self.labels_unique): # Distance of each test sample to its nearest neighbor from the level set for class `c` _, dist_temp = self.index_knn[c].query(data_test, k=1) distance_level_sets[:, j] = dist_temp[:, 0] # Trust score calculation return self._score_helper(distance_level_sets, labels_pred)
def transform_layer_embeddings(embeddings_in, transform_models): """ Perform dimension reduction on the data embeddings from each layer. The transformation or projection matrix for each layer is provided via the input `transform_models`. NOTE: In order to skip dimension reduction at a particular layer, the corresponding element of `transform_models` can be set to `None`. Thus, a list of `None` values can be passed to completely skip dimension reduction. :param embeddings_in: list of data embeddings per layer. `embeddings_in[i]` is a list of numpy arrays corresponding to the data batches from layer `i`. :param transform_models: A list of dictionaries with the transformation models per layer. The length of `transform_models` should be equal to the length of `embeddings_in`. :return: list of transformed data arrays, one per layer. """ n_layers = len(embeddings_in) assert len(transform_models) == n_layers, ("Length of 'transform_models' is not equal to the length of " "'embeddings_in'") embeddings_out = [] for i in range(n_layers): logger.info("Transforming the embeddings from layer {:d}:".format(i + 1)) embeddings_out.append(transform_data_from_model(embeddings_in[i], transform_models[i])) logger.info("Input dimension = {:d}, projected dimension = {:d}".format(embeddings_in[i].shape[1], embeddings_out[-1].shape[1])) return embeddings_out
def score(self, layer_embeddings, labels_pred, cleanup=True): """ Given a list of layer embeddings for test samples, extract the layer-wise LID feature vector and return the decision function of the logistic classifier. :param layer_embeddings: list of numpy arrays with the layer embeddings for normal samples. Length of the list is equal to the number of layers. The numpy array at index `i` has shape `(n, d_i)`, where `n` is the number of samples and `d_i` is the dimension of the embeddings at layer `i`. :param labels_pred: numpy array with the predicted class labels for the samples in `layer_embeddings`. :param cleanup: If set to True, the temporary directory where the KNN index files are saved will be deleted after scoring. If this method is to be called multiple times, set `cleanup = False` for all calls except the last one. :return: - numpy array of detection scores for the test samples. Has shape `(n, )` where `n` is the number of samples. Larger values correspond to a higher confidence that the sample is adversarial. """ n_test = layer_embeddings[0].shape[0] l = len(layer_embeddings) if l != self.n_layers: raise ValueError("Expecting {:d} layers in the input 'layer_embeddings', but received {:d} layers.". format(self.n_layers, l)) features_lid = np.zeros((n_test, self.n_layers)) for i in range(self.n_layers): logger.info("Calculating LID features for layer {:d}".format(i + 1)) if self.transform_models: # Dimension reduction data_proj = transform_data_from_model(layer_embeddings[i], self.transform_models[i]) else: data_proj = layer_embeddings[i] if self.save_knn_indices_to_file: # logger.info("Loading the KNN indices per class from file") with open(self.temp_knn_files[i], 'rb') as fp: self.index_knn[i] = pickle.load(fp) for c in self.labels_unique: ind = np.where(labels_pred == c)[0] if ind.shape[0]: _, nn_distances = self.index_knn[i][c].query(data_proj[ind, :], k=self.n_neighbors_per_class[c]) features_lid[ind, i] = lid_mle_amsaleg(nn_distances) if self.save_knn_indices_to_file: # Free up the allocated memory self.index_knn[i] = None if cleanup and self.save_knn_indices_to_file: _ = subprocess.check_call(['rm', '-rf', self.temp_direc]) features_lid = self.scaler.transform(features_lid) return self.model_logistic.decision_function(features_lid)
def fit(self, layer_embeddings_normal, labels_normal, labels_pred_normal, layer_embeddings_adversarial, labels_pred_adversarial, layer_embeddings_noisy=None, labels_pred_noisy=None): """ Extract the LID feature vector for normal, noisy, and adversarial samples and train a logistic classifier to separate adversarial samples from (normal + noisy). Cross-validation is used to select the hyper-parameter `C` using area under the ROC curve as the validation metric. NOTE: True labels and predicted labels are required for the normal feature embeddings. Only predicted labels are needed for the noisy and adversarial feature embeddings. :param layer_embeddings_normal: list of numpy arrays with the layer embeddings for normal samples. Length of the list is equal to the number of layers. The numpy array at index `i` has shape `(n, d_i)`, where `n` is the number of samples and `d_i` is the dimension of the embeddings at layer `i`. :param labels_normal: numpy array of class labels for the normal samples. Should have shape `(n, )`, where `n` is the number of normal samples. :param labels_pred_normal: numpy array of DNN classifier predictions for the normal samples. Should have the same shape as `labels_normal`. :param layer_embeddings_adversarial: Same format as `layer_embeddings_normal`, but corresponding to the adversarial samples. :param labels_pred_adversarial: numpy array of DNN classifier predictions for the adversarial samples. Should have shape `(n, )`, where `n` is the number of adversarial samples. :param layer_embeddings_noisy: Same format as `layer_embeddings_normal`, but corresponding to the noisy samples. Can be set to `None` to exclude noisy data from training. :param labels_pred_noisy: numpy array of DNN classifier predictions for the noisy samples. Should have shape `(n, )`, where `n` is the number of noisy samples. Can be set to `None` to exclude noisy data from training. :return: (self, scores_normal, scores_adversarial) if layer_embeddings_noise is None (self, scores_normal, scores_adversarial, scores_noisy) otherwise. ------------------------------------------------------- - self: trained instance of the class. - scores_normal: numpy array with the scores (decision function of the logistic classifier) for normal samples. 1d array with the same number of samples as `layer_embeddings_normal`. - scores_noisy: scores corresponding to `layer_embeddings_noisy` if noisy training data is provided. - scores_adversarial: scores corresponding to `layer_embeddings_adversarial`. """ self.n_layers = len(layer_embeddings_normal) logger.info("Number of layer embeddings: {:d}.".format(self.n_layers)) if layer_embeddings_noisy is None: logger.info("Noisy training data not provided.") cond1 = False noisy_data = False else: cond1 = (len(layer_embeddings_noisy) != self.n_layers) noisy_data = True if labels_pred_noisy is None: raise ValueError("Class predictions are not provided for the noisy data") if cond1 or (len(layer_embeddings_adversarial) != self.n_layers): raise ValueError("The layer embeddings for noisy and attack samples must have the same length as that " "of normal samples") if labels_normal.shape != labels_pred_normal.shape: raise ValueError("Length of arrays 'labels_normal' and 'labels_pred_normal' is not equal") # Number of samples in each of the categories self.n_samples = [ layer_embeddings_normal[0].shape[0], layer_embeddings_noisy[0].shape[0] if noisy_data else 0, layer_embeddings_adversarial[0].shape[0] ] # Distinct class labels self.labels_unique = np.unique(labels_normal) for c in self.labels_unique: # Normal labeled samples from class `c` self.indices_true[c] = np.where(labels_normal == c)[0] # Normal samples predicted into class `c` self.indices_pred_normal[c] = np.where(labels_pred_normal == c)[0] # Adversarial samples predicted into class `c` self.indices_pred_adver[c] = np.where(labels_pred_adversarial == c)[0] if noisy_data: # Noisy samples predicted into class `c` self.indices_pred_noisy[c] = np.where(labels_pred_noisy == c)[0] # Number of nearest neighbors per class if self.n_neighbors is None: # Set based on the number of samples from this class and the neighborhood constant self.n_neighbors_per_class[c] = \ int(np.ceil(self.indices_true[c].shape[0] ** self.neighborhood_constant)) else: # Use the value specified as input self.n_neighbors_per_class[c] = self.n_neighbors # The data arrays at all layers should have the same number of samples if not all([layer_embeddings_normal[i].shape[0] == self.n_samples[0] for i in range(self.n_layers)]): raise ValueError("Input 'layer_embeddings_normal' does not have the expected format") if noisy_data: if not all([layer_embeddings_noisy[i].shape[0] == self.n_samples[1] for i in range(self.n_layers)]): raise ValueError("Input 'layer_embeddings_noisy' does not have the expected format") if not all([layer_embeddings_adversarial[i].shape[0] == self.n_samples[2] for i in range(self.n_layers)]): raise ValueError("Input 'layer_embeddings_adversarial' does not have the expected format") if self.save_knn_indices_to_file: # Create a temporary directory for saving the KNN indices self.temp_direc = tempfile.mkdtemp(dir=os.getcwd()) self.temp_knn_files = [''] * self.n_layers # KNN indices for the layer embeddings from each layer and each class self.index_knn = [dict() for _ in range(self.n_layers)] features_lid_normal = np.zeros((self.n_samples[0], self.n_layers)) features_lid_noisy = np.zeros((self.n_samples[1], self.n_layers)) features_lid_adversarial = np.zeros((self.n_samples[2], self.n_layers)) for i in range(self.n_layers): logger.info("Processing layer {:d}:".format(i + 1)) # Dimensionality reduction of the layer embeddings, if required if self.transform_models: data_normal = transform_data_from_model(layer_embeddings_normal[i], self.transform_models[i]) data_adver = transform_data_from_model(layer_embeddings_adversarial[i], self.transform_models[i]) if noisy_data: data_noisy = transform_data_from_model(layer_embeddings_noisy[i], self.transform_models[i]) else: data_noisy = None d1 = layer_embeddings_normal[i].shape[1] d2 = data_normal.shape[1] if d2 < d1: logger.info("Input dimension = {:d}, projected dimension = {:d}".format(d1, d2)) else: data_normal = layer_embeddings_normal[i] data_adver = layer_embeddings_adversarial[i] if noisy_data: data_noisy = layer_embeddings_noisy[i] else: data_noisy = None for c in self.labels_unique: logger.info("Building a KNN index on the feature embeddings of normal samples from class {}". format(c)) self.index_knn[i][c] = KNNIndex( data_normal[self.indices_true[c], :], n_neighbors=self.n_neighbors_per_class[c], metric=self.metric, metric_kwargs=self.metric_kwargs, approx_nearest_neighbors=self.approx_nearest_neighbors, n_jobs=self.n_jobs, low_memory=self.low_memory, seed_rng=self.seed_rng ) logger.info("Calculating LID estimates for the normal, noisy, and adversarial layer embeddings " "predicted into class {}".format(c)) # Distance to nearest neighbors of all labeled samples from class `c` _, nn_distances_temp = self.index_knn[i][c].query_self(k=self.n_neighbors_per_class[c]) n_pred_normal = self.indices_pred_normal[c].shape[0] n_pred_adver = self.indices_pred_adver[c].shape[0] if noisy_data: n_pred_noisy = self.indices_pred_noisy[c].shape[0] else: n_pred_noisy = 0 if n_pred_normal: # Distance to nearest neighbors of samples predicted into class `c` that are also labeled as # class `c`. These samples will be a part of the KNN index nn_distances = helper_knn_distance(self.indices_pred_normal[c], self.indices_true[c], nn_distances_temp) mask = (nn_distances[:, 0] < 0.) if np.any(mask): # Distance to nearest neighbors of samples predicted into class `c` that are not labeled as # class `c`. These samples will not be a part of the KNN index ind_comp = self.indices_pred_normal[c][mask] _, temp_arr = self.index_knn[i][c].query(data_normal[ind_comp, :], k=self.n_neighbors_per_class[c]) nn_distances[mask, :] = temp_arr # LID estimates for the normal feature embeddings predicted into class `c` features_lid_normal[self.indices_pred_normal[c], i] = lid_mle_amsaleg(nn_distances) # LID estimates for the noisy feature embeddings predicted into class `c` if n_pred_noisy: temp_arr = data_noisy[self.indices_pred_noisy[c], :] _, nn_distances = self.index_knn[i][c].query(temp_arr, k=self.n_neighbors_per_class[c]) features_lid_noisy[self.indices_pred_noisy[c], i] = lid_mle_amsaleg(nn_distances) # LID estimates for the adversarial feature embeddings predicted into class `c` if n_pred_adver: temp_arr = data_adver[self.indices_pred_adver[c], :] _, nn_distances = self.index_knn[i][c].query(temp_arr, k=self.n_neighbors_per_class[c]) features_lid_adversarial[self.indices_pred_adver[c], i] = lid_mle_amsaleg(nn_distances) if self.save_knn_indices_to_file: logger.info("Saving the KNN indices per class from layer {:d} to a pickle file".format(i + 1)) self.temp_knn_files[i] = os.path.join(self.temp_direc, 'knn_indices_layer_{:d}.pkl'.format(i + 1)) with open(self.temp_knn_files[i], 'wb') as fp: pickle.dump(self.index_knn[i], fp) # Free up the allocated memory self.index_knn[i] = None # LID feature vectors and labels for the binary logistic classifier. # Normal and noisy samples are given label 0 and adversarial samples are given label 1 n_pos = features_lid_adversarial.shape[0] if noisy_data: features_lid = np.concatenate([features_lid_normal, features_lid_noisy, features_lid_adversarial], axis=0) labels_bin = np.concatenate([np.zeros(features_lid_normal.shape[0], dtype=np.int), np.zeros(features_lid_noisy.shape[0], dtype=np.int), np.ones(n_pos, dtype=np.int)]) else: features_lid = np.concatenate([features_lid_normal, features_lid_adversarial], axis=0) labels_bin = np.concatenate([np.zeros(features_lid_normal.shape[0], dtype=np.int), np.ones(n_pos, dtype=np.int)]) pos_prop = n_pos / float(labels_bin.shape[0]) # Randomly shuffle the samples to avoid determinism ind_perm = np.random.permutation(labels_bin.shape[0]) features_lid = features_lid[ind_perm, :] labels_bin = labels_bin[ind_perm] # Min-max scaling for the LID features self.scaler = MinMaxScaler().fit(features_lid) features_lid = self.scaler.transform(features_lid) logger.info("Training a binary logistic classifier with {:d} samples and {:d} LID features.". format(*features_lid.shape)) logger.info("Using {:d}-fold cross-validation with area under ROC curve as the metric to select " "the best regularization hyperparameter.".format(self.n_cv_folds)) logger.info("Proportion of positive (adversarial or OOD) samples in the training data: {:.4f}". format(pos_prop)) class_weight = None if self.balanced_classification: if (pos_prop < 0.45) or (pos_prop > 0.55): class_weight = {0: 1.0 / (1 - pos_prop), 1: 1.0 / pos_prop} logger.info("Balancing the classes by assigning sample weight {:.4f} to class 0 and sample weight " "{:.4f} to class 1".format(class_weight[0], class_weight[1])) self.model_logistic = LogisticRegressionCV( Cs=self.c_search_values, cv=self.n_cv_folds, penalty='l2', scoring='roc_auc', multi_class='auto', class_weight=class_weight, max_iter=self.max_iter, refit=True, n_jobs=self.n_jobs, random_state=self.seed_rng ).fit(features_lid, labels_bin) # Larger values of this score correspond to a higher probability of predicting class 1 (adversarial) scores_normal = self.model_logistic.decision_function(self.scaler.transform(features_lid_normal)) scores_adversarial = self.model_logistic.decision_function(self.scaler.transform(features_lid_adversarial)) if noisy_data: scores_noisy = self.model_logistic.decision_function(self.scaler.transform(features_lid_noisy)) return self, scores_normal, scores_adversarial, scores_noisy else: return self, scores_normal, scores_adversarial
def score(self, layer_embeddings, labels_pred, return_corrected_predictions=False, start_layer=0, test_layer_pairs=True, is_train=False): """ Given the layer embeddings (including possibly the input itself) and the predicted classes for test data, score them on how likely they are to be adversarial or out-of-distribution (OOD). Larger values of the scores correspond to a higher probability that the test sample is adversarial or OOD. The scores can be thresholded, with values above the threshold declared as adversarial or OOD. The threshold can be set such that the detector has a target false positive rate. :param layer_embeddings: list of numpy arrays with the layer embedding data. Length of the list is equal to the number of layers. The numpy array at index `i` has shape `(n, d_i)`, where `n` is the number of samples and `d_i` is the dimension of the embeddings at layer `i`. :param labels_pred: numpy array of class predictions made by the DNN. :param return_corrected_predictions: Set to True in order to get the most probable class prediction based on Bayes class posterior given the test statistic vector. Note that this will change the returned values. :param start_layer: Starting index of the layers to include in the p-value fusion. Set to 0 to include all the layers. Set to negative values such as -1, -2, -3 using the same convention as python indexing. For example, a value of `-3` implies the last 3 layers are included. :param test_layer_pairs: Set to True in order to estimate p-values for test statistics from all pairs of layers. These additional p-values are used by the method which combines p-values using Fisher's method, harmonic mean of p-values etc. :param is_train: Set to True if the inputs are the same non-adversarial inputs used with the `fit` method. :return: (scores [, corrected_classes]) - scores: numpy array of scores for detection or ranking. The array should have shape `(labels_pred.shape[0], )` and larger values correspond to a higher higher probability that the sample is adversarial or OOD. Score corresponding to OOD detection is returned if `self.ood_detection = True`. # returned only if `return_corrected_predictions = True` - corrected_classes: numpy array of the corrected class predictions. Has same shape and dtype as the array `labels_pred`. """ n_test = labels_pred.shape[0] l = len(layer_embeddings) if l != self.n_layers: raise ValueError("Expecting {:d} layers in the input data, but received {:d}".format(self.n_layers, l)) # Should bootstrap resampling be used to estimate the p-values at each layer? bootstrap = True if self.score_type in ('density', 'klpe'): if not self.use_top_ranked: # The p-values estimated are never used in this case. Therefore, skipping bootstrap to make it faster bootstrap = False # Test statistics at each layer conditioned on the predicted class and candidate true classes test_stats_pred = np.zeros((n_test, self.n_layers)) pvalues_pred = np.zeros((n_test, self.n_layers)) test_stats_true = {c: np.zeros((n_test, self.n_layers)) for c in self.labels_unique} pvalues_true = {c: np.zeros((n_test, self.n_layers)) for c in self.labels_unique} for i in range(self.n_layers): if self.transform_models: # Dimension reduction data_proj = transform_data_from_model(layer_embeddings[i], self.transform_models[i]) else: data_proj = layer_embeddings[i] # Test statistics and negative log p-values for layer `i` test_stats_temp, pvalues_temp = self.test_stats_models[i].score(data_proj, labels_pred, is_train=is_train, bootstrap=bootstrap) # `test_stats_temp` and `pvalues_temp` will have shape `(n_test, self.n_classes + 1)` test_stats_pred[:, i] = test_stats_temp[:, 0] pvalues_pred[:, i] = pvalues_temp[:, 0] for j, c in enumerate(self.labels_unique): test_stats_true[c][:, i] = test_stats_temp[:, j + 1] pvalues_true[c][:, i] = pvalues_temp[:, j + 1] if self.use_top_ranked: # For the test statistics conditioned on the predicted class, take the largest `self.num_top_ranked` # negative log p-values across the layers test_stats_pred, pvalues_pred = self._get_top_ranked(test_stats_pred, pvalues_pred, reverse=True) # For the test statistics conditioned on the true class, take the smallest `self.num_top_ranked` # negative log p-values across the layers for c in self.labels_unique: test_stats_true[c], pvalues_true[c] = self._get_top_ranked(test_stats_true[c], pvalues_true[c]) # Adversarial or OOD scores for the test samples and the corrected class predictions if self.score_type == 'density': scores_adver, scores_ood, corrected_classes = self._score_density_based( labels_pred, test_stats_pred, test_stats_true, return_corrected_predictions=return_corrected_predictions ) elif self.score_type == 'pvalue': if test_layer_pairs: n_pairs = int(0.5 * self.n_layers * (self.n_layers - 1)) # logger.info("Estimating p-values for the test statistics from {:d} layer pairs.".format(n_pairs)) pvalues_pred_pairs = np.zeros((n_test, n_pairs)) pvalues_true_pairs = dict() for c in self.labels_unique: # Samples predicted into class `c` ind = np.where(labels_pred == c)[0] pvalues_pred_pairs[ind, :] = pvalue_score_all_pairs( self.test_stats_pred_null[c], test_stats_pred[ind, :], log_transform=True, bootstrap=bootstrap ) pvalues_true_pairs[c] = pvalue_score_all_pairs( self.test_stats_true_null[c], test_stats_true[c], log_transform=True, bootstrap=bootstrap ) # Append columns corresponding to the p-values from the layer pairs pvalues_true[c] = np.hstack((pvalues_true[c], pvalues_true_pairs[c])) # Append columns corresponding to the p-values from the layer pairs pvalues_pred = np.hstack((pvalues_pred, pvalues_pred_pairs)) scores_adver, scores_ood, corrected_classes = self._score_pvalue_based( labels_pred, pvalues_pred, pvalues_true, return_corrected_predictions=return_corrected_predictions, start_layer=start_layer ) elif self.score_type == 'klpe': scores_adver, scores_ood, corrected_classes = self._score_klpe( labels_pred, test_stats_pred, test_stats_true, return_corrected_predictions=return_corrected_predictions ) else: raise ValueError("Invalid score type '{}'".format(self.score_type)) if return_corrected_predictions: if self.ood_detection: return scores_ood, corrected_classes else: return scores_adver, corrected_classes else: if self.ood_detection: return scores_ood else: return scores_adver
def fit(self, layer_embeddings, labels, labels_pred, **kwargs): """ Estimate parameters of the detection method given natural (non-adversarial) input data. NOTE: Inputs to this method can be obtained by calling the function `extract_layer_embeddings`. :param layer_embeddings: list of numpy arrays with the layer embedding data. Length of the list is equal to the number of layers. The numpy array at index `i` has shape `(n, d_i)`, where `n` is the number of samples and `d_i` is the dimension of the embeddings at layer `i`. :param labels: numpy array of labels for the classification problem addressed by the DNN. Should have shape `(n, )`, where `n` is the number of samples. :param labels_pred: numpy array of class predictions made by the DNN. Should have the same shape as `labels`. :param kwargs: dict with additional keyword arguments that can be passed to the `fit` method of the test statistic class. :return: Instance of the class with all parameters fit to the data. """ self.n_layers = len(layer_embeddings) self.labels_unique = np.unique(labels) self.n_classes = len(self.labels_unique) self.n_samples = labels.shape[0] logger.info("Number of classes: {:d}.".format(self.n_classes)) logger.info("Number of layer embeddings: {:d}.".format(self.n_layers)) logger.info("Number of samples: {:d}.".format(self.n_samples)) logger.info("Test statistic calculated at each layer: {}.".format(self.layer_statistic)) if labels_pred.shape[0] != self.n_samples: raise ValueError("Inputs 'labels' and 'labels_pred' do not have the same size.") if not all([layer_embeddings[i].shape[0] == self.n_samples for i in range(self.n_layers)]): raise ValueError("Input 'layer_embeddings' does not have the expected format") if self.use_top_ranked: if self.num_top_ranked > self.n_layers: logger.warning("Number of top-ranked layer statistics cannot be larger than the number of layers. " "Setting it equal to the number of layers ({:d}).".format(self.n_layers)) self.num_top_ranked = self.n_layers self.log_class_priors = np.zeros(self.n_classes) indices_true = dict() indices_pred = dict() test_stats_true = dict() pvalues_true = dict() test_stats_pred = dict() pvalues_pred = dict() for c in self.labels_unique: indices_true[c] = np.where(labels == c)[0] indices_pred[c] = np.where(labels_pred == c)[0] # Test statistics and negative log p-values across the layers for the samples labeled into class `c` test_stats_true[c] = np.zeros((indices_true[c].shape[0], self.n_layers)) pvalues_true[c] = np.zeros((indices_true[c].shape[0], self.n_layers)) # Test statistics and negative log p-values across the layers for the samples predicted into class `c` test_stats_pred[c] = np.zeros((indices_pred[c].shape[0], self.n_layers)) pvalues_pred[c] = np.zeros((indices_pred[c].shape[0], self.n_layers)) # Log of the class prior probability self.log_class_priors[c] = indices_true[c].shape[0] self.log_class_priors = np.log(self.log_class_priors) - np.log(self.n_samples) for i in range(self.n_layers): if self.transform_models: logger.info("Transforming the embeddings from layer {:d}.".format(i + 1)) data_proj = transform_data_from_model(layer_embeddings[i], self.transform_models[i]) logger.info("Input dimension = {:d}, projected dimension = {:d}". format(layer_embeddings[i].shape[1], data_proj.shape[1])) else: data_proj = layer_embeddings[i] logger.info("Parameter estimation and test statistics calculation for layer {:d}:".format(i + 1)) ts_obj = None # Bootstrap p-values are used only if `self.use_top_ranked = True` because in this case the test # statistics across the layers are ranked based on the p-values kwargs_fit = {'bootstrap': self.use_top_ranked} if self.layer_statistic == 'multinomial': ts_obj = MultinomialScore( neighborhood_constant=self.neighborhood_constant, n_neighbors=self.n_neighbors, metric=self.metric, metric_kwargs=self.metric_kwargs, shared_nearest_neighbors=False, approx_nearest_neighbors=self.approx_nearest_neighbors, n_jobs=self.n_jobs, low_memory=self.low_memory, seed_rng=self.seed_rng ) if 'combine_low_proba_classes' in kwargs: kwargs_fit['combine_low_proba_classes'] = kwargs['combine_low_proba_classes'] if 'n_classes_multinom' in kwargs: kwargs_fit['n_classes_multinom'] = kwargs['n_classes_multinom'] elif self.layer_statistic == 'binomial': ts_obj = BinomialScore( neighborhood_constant=self.neighborhood_constant, n_neighbors=self.n_neighbors, metric=self.metric, metric_kwargs=self.metric_kwargs, shared_nearest_neighbors=False, approx_nearest_neighbors=self.approx_nearest_neighbors, n_jobs=self.n_jobs, low_memory=self.low_memory, seed_rng=self.seed_rng ) elif self.layer_statistic == 'lid': ts_obj = LIDScore( neighborhood_constant=self.neighborhood_constant, n_neighbors=self.n_neighbors, metric='euclidean', # use 'euclidean' metric for LID estimation metric_kwargs=None, approx_nearest_neighbors=self.approx_nearest_neighbors, n_jobs=self.n_jobs, low_memory=self.low_memory, seed_rng=self.seed_rng ) elif self.layer_statistic == 'lle': ts_obj = LLEScore( neighborhood_constant=self.neighborhood_constant, n_neighbors=self.n_neighbors, metric=self.metric, metric_kwargs=self.metric_kwargs, approx_nearest_neighbors=self.approx_nearest_neighbors, n_jobs=self.n_jobs, low_memory=self.low_memory, seed_rng=self.seed_rng ) elif self.layer_statistic == 'distance': ts_obj = DistanceScore( neighborhood_constant=self.neighborhood_constant, n_neighbors=self.n_neighbors, metric=self.metric, metric_kwargs=self.metric_kwargs, approx_nearest_neighbors=self.approx_nearest_neighbors, n_jobs=self.n_jobs, low_memory=self.low_memory, seed_rng=self.seed_rng ) elif self.layer_statistic == 'trust': ts_obj = TrustScore( neighborhood_constant=self.neighborhood_constant, n_neighbors=self.n_neighbors, metric=self.metric, metric_kwargs=self.metric_kwargs, approx_nearest_neighbors=self.approx_nearest_neighbors, n_jobs=self.n_jobs, low_memory=self.low_memory, seed_rng=self.seed_rng ) test_stats_temp, pvalues_temp = ts_obj.fit( data_proj, labels, labels_pred, labels_unique=self.labels_unique, **kwargs_fit ) ''' - `test_stats_temp` will be a numpy array of shape `(self.n_samples, self.n_classes + 1)` with a vector of test statistics for each sample. The first column `test_stats_temp[:, 0]` gives the scores conditioned on the predicted class. The remaining columns `test_stats_temp[:, i]` for `i = 1, 2, . . .` gives the scores conditioned on `i - 1` being the candidate true class for the sample. - `pvalues_temp` is also a numpy array of the same shape with the negative log transformed p-values corresponding to the test statistics. ''' self.test_stats_models.append(ts_obj) for j, c in enumerate(self.labels_unique): # Test statistics and negative log p-values from layer `i` test_stats_pred[c][:, i] = test_stats_temp[indices_pred[c], 0] pvalues_pred[c][:, i] = pvalues_temp[indices_pred[c], 0] test_stats_true[c][:, i] = test_stats_temp[indices_true[c], j + 1] pvalues_true[c][:, i] = pvalues_temp[indices_true[c], j + 1] for c in self.labels_unique: if self.use_top_ranked: logger.info("Using the test statistics corresponding to the smallest (largest) {:d} p-values " "conditioned on the predicted (true) class.".format(self.num_top_ranked)) # For the test statistics conditioned on the predicted class, take the largest # `self.num_top_ranked` negative log-transformed p-values across the layers test_stats_pred[c], pvalues_pred[c] = self._get_top_ranked( test_stats_pred[c], pvalues_pred[c], reverse=True ) # For the test statistics conditioned on the true class, take the smallest `self.num_top_ranked` # negative log-transformed p-values across the layers test_stats_true[c], pvalues_true[c] = self._get_top_ranked(test_stats_true[c], pvalues_true[c]) if self.score_type == 'density': logger.info("Learning a joint probability density model for the test statistics conditioned on the " "predicted class '{}':".format(c)) logger.info("Number of samples = {:d}, dimension = {:d}".format(*test_stats_pred[c].shape)) self.density_models_pred[c] = train_log_normal_mixture(test_stats_pred[c], seed_rng=self.seed_rng) # Negative log density of the data used to fit the model arr1 = -1. * score_log_normal_mixture(test_stats_pred[c], self.density_models_pred[c], log_transform=True) # Generate a large number of random samples from the model test_stats_rand_sample, _ = self.density_models_pred[c].sample(n_samples=NUM_RANDOM_SAMPLES) # Negative log density of the generated random samples. Log transformation is not needed since the # samples are generated from the model arr2 = -1. * score_log_normal_mixture(test_stats_rand_sample, self.density_models_pred[c], log_transform=False) self.samples_neg_log_dens_pred[c] = np.concatenate([arr1, arr2]) logger.info("Number of log-density sample values used for estimating p-values: {:d}". format(self.samples_neg_log_dens_pred[c].shape[0])) logger.info("Learning a joint probability density model for the test statistics conditioned on the " "true class '{}':".format(c)) logger.info("Number of samples = {:d}, dimension = {:d}".format(*test_stats_true[c].shape)) self.density_models_true[c] = train_log_normal_mixture(test_stats_true[c], seed_rng=self.seed_rng) # Negative log density of the data used to fit the model arr1 = -1. * score_log_normal_mixture(test_stats_true[c], self.density_models_true[c], log_transform=True) # Generate a large number of random samples from the model test_stats_rand_sample, _ = self.density_models_true[c].sample(n_samples=NUM_RANDOM_SAMPLES) # Negative log density of the generated random samples arr2 = -1. * score_log_normal_mixture(test_stats_rand_sample, self.density_models_true[c], log_transform=False) self.samples_neg_log_dens_true[c] = np.concatenate([arr1, arr2]) logger.info("Number of log-density sample values used for estimating p-values: {:d}". format(self.samples_neg_log_dens_true[c].shape[0])) if self.score_type == 'klpe': # Not setting the number of neighbors here. This will be automatically set based on the number of # samples per class kwargs_lpe = { 'neighborhood_constant': self.neighborhood_constant, 'metric': self.metric, 'metric_kwargs': self.metric_kwargs, 'approx_nearest_neighbors': self.approx_nearest_neighbors, 'n_jobs': self.n_jobs, 'seed_rng': self.seed_rng } logger.info("Fitting the localized p-value estimation model for the test statistics conditioned on " "the predicted class {}:".format(c)) self.klpe_models_pred[c] = averaged_KLPE_anomaly_detection(**kwargs_lpe) self.klpe_models_pred[c].fit(test_stats_pred[c]) logger.info("Fitting the localized p-value estimation model for the test statistics conditioned on " "the true class {}:".format(c)) self.klpe_models_true[c] = averaged_KLPE_anomaly_detection(**kwargs_lpe) self.klpe_models_true[c].fit(test_stats_true[c]) self.test_stats_pred_null = test_stats_pred self.test_stats_true_null = test_stats_true return self
def score(self, layer_embeddings, is_train=False): """ :param layer_embeddings: list of numpy arrays with the layer embedding data. Length of the list is equal to the number of layers. The numpy array at index `i` has shape `(n, d_i)`, where `n` is the number of samples and `d_i` is the dimension of the embeddings at layer `i`. :param is_train: Set to True if the inputs are the same non-adversarial inputs used with the `fit` method. :return: (scores, predictions) - scores: numpy array of scores corresponding to OOD or adversarial detection. It is the negative log of the credibility scores. So high values of this score correspond to low credibility (i.e. high probability of an outlier). - predictions: numpy array of the corrected deep kNN class predictions. Has the same shape as `scores`. """ n_test = layer_embeddings[0].shape[0] l = len(layer_embeddings) if l != self.n_layers: raise ValueError( "Expecting {:d} layers in the input data, but received {:d}". format(self.n_layers, l)) nonconformity_per_class = np.zeros((n_test, self.n_classes)) for i in range(self.n_layers): if self.transform_models: # Dimension reduction data_proj = transform_data_from_model(layer_embeddings[i], self.transform_models[i]) else: data_proj = layer_embeddings[i] # Indices of the nearest neighbors of each test sample if is_train: nn_indices, _ = self.index_knn[i].query_self( k=self.n_neighbors) else: nn_indices, _ = self.index_knn[i].query(data_proj, k=self.n_neighbors) # Class label counts among the nearest neighbors _, nc_counts = neighbors_label_counts(nn_indices, self.labels_train_enc, self.n_classes) for j, c in enumerate(self.labels_unique): # Neighborhood counts of all classes except `c` nc_counts_slice = nc_counts[:, self.mask_exclude[j, :]] # Nonconformity w.r.t class `c` from layer `i` nonconformity_per_class[:, j] += np.sum(nc_counts_slice, axis=1) # Calculate the p-values per-class with respect to the non-conformity scores of the calibration set mask = self.nonconformity_calib[:, np. newaxis] >= nonconformity_per_class.ravel( )[np.newaxis, :] v = np.sum(mask, axis=0) / float(mask.shape[0]) p_values = v.reshape((n_test, self.n_classes)) # Credibility is the maximum p-value over all classes credibility = np.max(p_values, axis=1) # Anomaly score scores = -np.log(np.clip(credibility, sys.float_info.min, None)) # Deep k-NN prediction is the class corresponding to the largest p-value predictions = np.array( [self.labels_unique[j] for j in np.argmax(p_values, axis=1)], dtype=self.labels_unique.dtype) return scores, predictions
def fit(self, layer_embeddings, labels): """ Estimate parameters of the detection method given natural (non-adversarial) input data. Note that this data should be different from that used to train the DNN classifier. NOTE: Inputs to this method can be obtained by calling the function `extract_layer_embeddings`. :param layer_embeddings: list of numpy arrays with the layer embedding data. Length of the list is equal to the number of layers. The numpy array at index `i` has shape `(n, d_i)`, where `n` is the number of samples and `d_i` is the dimension of the embeddings at layer `i`. :param labels: numpy array of labels for the classification problem addressed by the DNN. Should have shape `(n, )`, where `n` is the number of samples. :return: Instance of the class with all parameters fit to the data. """ self.n_layers = len(layer_embeddings) self.labels_unique = np.unique(labels) self.n_classes = len(self.labels_unique) self.n_samples = labels.shape[0] # Mapping from the original labels to the set {0, 1, . . .,self.n_classes - 1}. This is needed by the label # count function d = dict(zip(self.labels_unique, np.arange(self.n_classes))) self.label_encoder = np.vectorize(d.__getitem__) # Number of nearest neighbors if self.n_neighbors is None: # Set number of nearest neighbors based on the data size and the neighborhood constant self.n_neighbors = int( np.ceil(self.n_samples**self.neighborhood_constant)) logger.info("Number of classes: {:d}.".format(self.n_classes)) logger.info("Number of layer embeddings: {:d}.".format(self.n_layers)) logger.info("Number of samples: {:d}.".format(self.n_samples)) logger.info("Number of neighbors: {:d}.".format(self.n_neighbors)) if not all([ layer_embeddings[i].shape[0] == self.n_samples for i in range(self.n_layers) ]): raise ValueError( "Input 'layer_embeddings' does not have the expected format") self.labels_train_enc = self.label_encoder(labels) indices_true = dict() self.mask_exclude = np.ones((self.n_classes, self.n_classes), dtype=np.bool) for j, c in enumerate(self.labels_unique): # Index of labeled samples from class `c` indices_true[c] = np.where(labels == c)[0] self.mask_exclude[j, j] = False self.nonconformity_calib = np.zeros(self.n_samples) self.index_knn = [None for _ in range(self.n_layers)] for i in range(self.n_layers): logger.info("Processing layer {:d}:".format(i + 1)) if self.transform_models: logger.info( "Transforming the embeddings from layer {:d}.".format(i + 1)) data_proj = transform_data_from_model(layer_embeddings[i], self.transform_models[i]) logger.info( "Input dimension = {:d}, projected dimension = {:d}". format(layer_embeddings[i].shape[1], data_proj.shape[1])) else: data_proj = layer_embeddings[i] logger.info("Building a KNN index for nearest neighbor queries.") # Build a KNN index on the set of feature embeddings from normal samples from layer `i` self.index_knn[i] = KNNIndex( data_proj, n_neighbors=self.n_neighbors, metric=self.metric, metric_kwargs=self.metric_kwargs, approx_nearest_neighbors=self.approx_nearest_neighbors, n_jobs=self.n_jobs, low_memory=self.low_memory, seed_rng=self.seed_rng) # Indices of the nearest neighbors of each sample nn_indices, _ = self.index_knn[i].query_self(k=self.n_neighbors) logger.info( "Calculating the class label counts and non-conformity scores in the neighborhood of " "each sample.") _, nc_counts = neighbors_label_counts(nn_indices, self.labels_train_enc, self.n_classes) for j, c in enumerate(self.labels_unique): # Neighborhood counts of all classes except `c` nc_counts_slice = nc_counts[:, self.mask_exclude[j, :]] # Nonconformity from layer `i` for all labeled samples from class `c` self.nonconformity_calib[indices_true[c]] += np.sum( nc_counts_slice[indices_true[c], :], axis=1) return self
def fit(self, data, labels, labels_pred): """ Estimate the `1 - alpha` density level sets for each class using the given data, with true labels and classifier-predicted labels. This will be used to calculate the trust score. :param data: numpy array with the feature vectors of shape `(n, d)`, where `n` and `d` are the number of samples and the data dimension respectively. :param labels: numpy array of labels for the classification problem addressed by the DNN. Should have shape `(n, )`, where `n` is the number of samples. :param labels_pred: numpy array similar to `labels`, but with the classes predicted by the classifier. :return: Instance of the class with all parameters fit to the data. """ self.n_samples, dim = data.shape self.labels_unique = np.unique(labels) self.n_classes = len(self.labels_unique) if self.n_neighbors is None: # Set number of nearest neighbors based on the maximum number of samples per class and the neighborhood # constant num = 0 for c in self.labels_unique: ind = np.where(labels == c)[0] if ind.shape[0] > num: num = ind.shape[0] self.n_neighbors = int(np.ceil(num ** self.neighborhood_constant)) logger.info("Number of samples: {:d}. Data dimension = {:d}.".format(self.n_samples, dim)) logger.info("Number of classes: {:d}.".format(self.n_classes)) logger.info("Number of neighbors (k): {:d}.".format(self.n_neighbors)) logger.info("Fraction of outliers (alpha): {:.4f}.".format(self.alpha)) if self.model_dim_reduction: data = transform_data_from_model(data, self.model_dim_reduction) dim = data.shape[1] logger.info("Applying dimension reduction to the data. Projected dimension = {:d}.".format(dim)) # Distance from each sample in `data` to the `1 - alpha` level sets corresponding to each class distance_level_sets = np.zeros((self.n_samples, self.n_classes)) self.index_knn = dict() self.epsilon = dict() indices_sub = dict() for j, c in enumerate(self.labels_unique): logger.info("Processing data from class '{}':".format(c)) logger.info("Building a KNN index for all the samples from class '{}'.".format(c)) indices_sub[c] = np.where(labels == c)[0] data_sub = data[indices_sub[c], :] self.index_knn[c] = KNNIndex( data_sub, n_neighbors=self.n_neighbors, metric=self.metric, metric_kwargs=self.metric_kwargs, approx_nearest_neighbors=self.approx_nearest_neighbors, n_jobs=self.n_jobs, low_memory=self.low_memory, seed_rng=self.seed_rng ) # Distances to the k nearest neighbors of each sample _, nn_distances = self.index_knn[c].query_self(k=self.n_neighbors) # Radius or distance to the k-th nearest neighbor for each sample radius_arr = nn_distances[:, self.n_neighbors - 1] # Smallest radius `epsilon` such that only `alpha` fraction of the samples from class `c` have radius # greater than `epsilon` if self.alpha > 0.: self.epsilon[c] = np.percentile(radius_arr, 100 * (1 - self.alpha), interpolation='midpoint') # Exclude the outliers and build a KNN index with the remaining samples mask_incl = radius_arr <= self.epsilon[c] mask_excl = np.logical_not(mask_incl) num_excl = mask_excl[mask_excl].shape[0] else: # Slightly larger value than the largest radius self.epsilon[c] = 1.0001 * np.max(radius_arr) # All samples are included in the density level set mask_incl = np.ones(indices_sub[c].shape[0], dtype=np.bool) mask_excl = np.logical_not(mask_incl) num_excl = 0 if num_excl: logger.info("Excluding {:d} samples with radius larger than {:.6f} and building a KNN index with " "the remaining samples.".format(num_excl, self.epsilon[c])) self.index_knn[c] = KNNIndex( data_sub[mask_incl, :], n_neighbors=self.n_neighbors, metric=self.metric, metric_kwargs=self.metric_kwargs, approx_nearest_neighbors=self.approx_nearest_neighbors, n_jobs=self.n_jobs, low_memory=self.low_memory, seed_rng=self.seed_rng ) # Distance to the nearest neighbor of each sample that is part of the KNN index _, dist_temp = self.index_knn[c].query_self(k=1) ind = indices_sub[c][mask_incl] distance_level_sets[ind, j] = dist_temp[:, 0] # Distance to the nearest neighbor of each sample that is not a part of the KNN index (outliers) _, dist_temp = self.index_knn[c].query(data_sub[mask_excl, :], k=1) ind = indices_sub[c][mask_excl] distance_level_sets[ind, j] = dist_temp[:, 0] else: # No need to rebuild the KNN index because no samples are excluded. # Distance to the nearest neighbor of each sample distance_level_sets[indices_sub[c], j] = nn_distances[:, 0] logger.info("Calculating the trust score for the estimation data.") for c in self.labels_unique: # Compute the distance from each sample from class `c` to the level sets from the remaining classes data_sub = data[indices_sub[c], :] for j, c_hat in enumerate(self.labels_unique): if c_hat == c: continue _, dist_temp = self.index_knn[c_hat].query(data_sub, k=1) distance_level_sets[indices_sub[c], j] = dist_temp[:, 0] self.scores_estim = self._score_helper(distance_level_sets, labels_pred) return self