コード例 #1
0
    def score_trust(self, data_test, labels_pred, is_train=False):
        """
        Calculate the trust score of each test sample given its classifier-predicted labels. The score is non-negative,
        with higher values corresponding to a higher level of trust in the classifier's prediction.

        :param data: numpy array with the test data of shape `(n, d)`, where `n` and `d` are the number of samples
                     and the dimension respectively.
        :param labels_pred: numpy array of the classifier-predicted labels for the samples in `data_test`. Should
                            have shape `(n, )`.
        :param is_train: Set to true if this data was used to also passed to the `fit` method for estimation.

        :return: numpy array of trust scores for each test sample.
        """
        if is_train:
            return self.scores_estim

        if self.model_dim_reduction:
            data_test = transform_data_from_model(data_test, self.model_dim_reduction)

        distance_level_sets = np.zeros((data_test.shape[0], self.n_classes))
        for j, c in enumerate(self.labels_unique):
            # Distance of each test sample to its nearest neighbor from the level set for class `c`
            _, dist_temp = self.index_knn[c].query(data_test, k=1)
            distance_level_sets[:, j] = dist_temp[:, 0]

        # Trust score calculation
        return self._score_helper(distance_level_sets, labels_pred)
コード例 #2
0
def transform_layer_embeddings(embeddings_in, transform_models):
    """
    Perform dimension reduction on the data embeddings from each layer. The transformation or projection matrix
    for each layer is provided via the input `transform_models`.

    NOTE: In order to skip dimension reduction at a particular layer, the corresponding element of
    `transform_models` can be set to `None`. Thus, a list of `None` values can be passed to completely skip
    dimension reduction.

    :param embeddings_in: list of data embeddings per layer. `embeddings_in[i]` is a list of numpy arrays
                          corresponding to the data batches from layer `i`.
    :param transform_models: A list of dictionaries with the transformation models per layer. The length of
                             `transform_models` should be equal to the length of `embeddings_in`.
    :return: list of transformed data arrays, one per layer.
    """
    n_layers = len(embeddings_in)
    assert len(transform_models) == n_layers, ("Length of 'transform_models' is not equal to the length of "
                                               "'embeddings_in'")
    embeddings_out = []
    for i in range(n_layers):
        logger.info("Transforming the embeddings from layer {:d}:".format(i + 1))
        embeddings_out.append(transform_data_from_model(embeddings_in[i], transform_models[i]))
        logger.info("Input dimension = {:d}, projected dimension = {:d}".format(embeddings_in[i].shape[1],
                                                                                embeddings_out[-1].shape[1]))

    return embeddings_out
コード例 #3
0
    def score(self, layer_embeddings, labels_pred, cleanup=True):
        """
        Given a list of layer embeddings for test samples, extract the layer-wise LID feature vector and return the
        decision function of the logistic classifier.
        :param layer_embeddings: list of numpy arrays with the layer embeddings for normal samples. Length of the
                                 list is equal to the number of layers. The numpy array at index `i` has shape
                                 `(n, d_i)`, where `n` is the number of samples and `d_i` is the dimension of the
                                 embeddings at layer `i`.
        :param labels_pred: numpy array with the predicted class labels for the samples in `layer_embeddings`.
        :param cleanup: If set to True, the temporary directory where the KNN index files are saved will be deleted
                        after scoring. If this method is to be called multiple times, set `cleanup = False` for all
                        calls except the last one.
        :return:
            - numpy array of detection scores for the test samples. Has shape `(n, )` where `n` is the number of
              samples. Larger values correspond to a higher confidence that the sample is adversarial.
        """
        n_test = layer_embeddings[0].shape[0]
        l = len(layer_embeddings)
        if l != self.n_layers:
            raise ValueError("Expecting {:d} layers in the input 'layer_embeddings', but received {:d} layers.".
                             format(self.n_layers, l))

        features_lid = np.zeros((n_test, self.n_layers))
        for i in range(self.n_layers):
            logger.info("Calculating LID features for layer {:d}".format(i + 1))
            if self.transform_models:
                # Dimension reduction
                data_proj = transform_data_from_model(layer_embeddings[i], self.transform_models[i])
            else:
                data_proj = layer_embeddings[i]

            if self.save_knn_indices_to_file:
                # logger.info("Loading the KNN indices per class from file")
                with open(self.temp_knn_files[i], 'rb') as fp:
                    self.index_knn[i] = pickle.load(fp)

            for c in self.labels_unique:
                ind = np.where(labels_pred == c)[0]
                if ind.shape[0]:
                    _, nn_distances = self.index_knn[i][c].query(data_proj[ind, :], k=self.n_neighbors_per_class[c])
                    features_lid[ind, i] = lid_mle_amsaleg(nn_distances)

            if self.save_knn_indices_to_file:
                # Free up the allocated memory
                self.index_knn[i] = None

        if cleanup and self.save_knn_indices_to_file:
            _ = subprocess.check_call(['rm', '-rf', self.temp_direc])

        features_lid = self.scaler.transform(features_lid)
        return self.model_logistic.decision_function(features_lid)
コード例 #4
0
    def fit(self, layer_embeddings_normal, labels_normal, labels_pred_normal,
            layer_embeddings_adversarial, labels_pred_adversarial,
            layer_embeddings_noisy=None, labels_pred_noisy=None):
        """
        Extract the LID feature vector for normal, noisy, and adversarial samples and train a logistic classifier
        to separate adversarial samples from (normal + noisy). Cross-validation is used to select the hyper-parameter
        `C` using area under the ROC curve as the validation metric.

        NOTE:
        True labels and predicted labels are required for the normal feature embeddings.
        Only predicted labels are needed for the noisy and adversarial feature embeddings.

        :param layer_embeddings_normal: list of numpy arrays with the layer embeddings for normal samples.
                                        Length of the list is equal to the number of layers. The numpy array at
                                        index `i` has shape `(n, d_i)`, where `n` is the number of samples and `d_i`
                                        is the dimension of the embeddings at layer `i`.
        :param labels_normal: numpy array of class labels for the normal samples. Should have shape `(n, )`, where
                              `n` is the number of normal samples.
        :param labels_pred_normal: numpy array of DNN classifier predictions for the normal samples. Should have the
                                   same shape as `labels_normal`.
        :param layer_embeddings_adversarial: Same format as `layer_embeddings_normal`, but corresponding to
                                             the adversarial samples.
        :param labels_pred_adversarial: numpy array of DNN classifier predictions for the adversarial samples. Should
                                        have shape `(n, )`, where `n` is the number of adversarial samples.
        :param layer_embeddings_noisy: Same format as `layer_embeddings_normal`, but corresponding to the noisy
                                       samples. Can be set to `None` to exclude noisy data from training.
        :param labels_pred_noisy: numpy array of DNN classifier predictions for the noisy samples. Should have shape
                                  `(n, )`, where `n` is the number of noisy samples. Can be set to `None` to exclude
                                  noisy data from training.
        :return:
            (self, scores_normal, scores_adversarial) if layer_embeddings_noise is None
            (self, scores_normal, scores_adversarial, scores_noisy) otherwise.
            -------------------------------------------------------
            - self: trained instance of the class.
            - scores_normal: numpy array with the scores (decision function of the logistic classifier) for normal
                             samples. 1d array with the same number of samples as `layer_embeddings_normal`.
            - scores_noisy: scores corresponding to `layer_embeddings_noisy` if noisy training data is provided.
            - scores_adversarial: scores corresponding to `layer_embeddings_adversarial`.
        """
        self.n_layers = len(layer_embeddings_normal)
        logger.info("Number of layer embeddings: {:d}.".format(self.n_layers))
        if layer_embeddings_noisy is None:
            logger.info("Noisy training data not provided.")
            cond1 = False
            noisy_data = False
        else:
            cond1 = (len(layer_embeddings_noisy) != self.n_layers)
            noisy_data = True
            if labels_pred_noisy is None:
                raise ValueError("Class predictions are not provided for the noisy data")

        if cond1 or (len(layer_embeddings_adversarial) != self.n_layers):
            raise ValueError("The layer embeddings for noisy and attack samples must have the same length as that "
                             "of normal samples")

        if labels_normal.shape != labels_pred_normal.shape:
            raise ValueError("Length of arrays 'labels_normal' and 'labels_pred_normal' is not equal")

        # Number of samples in each of the categories
        self.n_samples = [
            layer_embeddings_normal[0].shape[0],
            layer_embeddings_noisy[0].shape[0] if noisy_data else 0,
            layer_embeddings_adversarial[0].shape[0]
        ]
        # Distinct class labels
        self.labels_unique = np.unique(labels_normal)
        for c in self.labels_unique:
            # Normal labeled samples from class `c`
            self.indices_true[c] = np.where(labels_normal == c)[0]
            # Normal samples predicted into class `c`
            self.indices_pred_normal[c] = np.where(labels_pred_normal == c)[0]
            # Adversarial samples predicted into class `c`
            self.indices_pred_adver[c] = np.where(labels_pred_adversarial == c)[0]
            if noisy_data:
                # Noisy samples predicted into class `c`
                self.indices_pred_noisy[c] = np.where(labels_pred_noisy == c)[0]

            # Number of nearest neighbors per class
            if self.n_neighbors is None:
                # Set based on the number of samples from this class and the neighborhood constant
                self.n_neighbors_per_class[c] = \
                    int(np.ceil(self.indices_true[c].shape[0] ** self.neighborhood_constant))
            else:
                # Use the value specified as input
                self.n_neighbors_per_class[c] = self.n_neighbors

        # The data arrays at all layers should have the same number of samples
        if not all([layer_embeddings_normal[i].shape[0] == self.n_samples[0] for i in range(self.n_layers)]):
            raise ValueError("Input 'layer_embeddings_normal' does not have the expected format")

        if noisy_data:
            if not all([layer_embeddings_noisy[i].shape[0] == self.n_samples[1] for i in range(self.n_layers)]):
                raise ValueError("Input 'layer_embeddings_noisy' does not have the expected format")

        if not all([layer_embeddings_adversarial[i].shape[0] == self.n_samples[2] for i in range(self.n_layers)]):
            raise ValueError("Input 'layer_embeddings_adversarial' does not have the expected format")

        if self.save_knn_indices_to_file:
            # Create a temporary directory for saving the KNN indices
            self.temp_direc = tempfile.mkdtemp(dir=os.getcwd())
            self.temp_knn_files = [''] * self.n_layers

        # KNN indices for the layer embeddings from each layer and each class
        self.index_knn = [dict() for _ in range(self.n_layers)]
        features_lid_normal = np.zeros((self.n_samples[0], self.n_layers))
        features_lid_noisy = np.zeros((self.n_samples[1], self.n_layers))
        features_lid_adversarial = np.zeros((self.n_samples[2], self.n_layers))
        for i in range(self.n_layers):
            logger.info("Processing layer {:d}:".format(i + 1))
            # Dimensionality reduction of the layer embeddings, if required
            if self.transform_models:
                data_normal = transform_data_from_model(layer_embeddings_normal[i], self.transform_models[i])
                data_adver = transform_data_from_model(layer_embeddings_adversarial[i], self.transform_models[i])
                if noisy_data:
                    data_noisy = transform_data_from_model(layer_embeddings_noisy[i], self.transform_models[i])
                else:
                    data_noisy = None

                d1 = layer_embeddings_normal[i].shape[1]
                d2 = data_normal.shape[1]
                if d2 < d1:
                    logger.info("Input dimension = {:d}, projected dimension = {:d}".format(d1, d2))
            else:
                data_normal = layer_embeddings_normal[i]
                data_adver = layer_embeddings_adversarial[i]
                if noisy_data:
                    data_noisy = layer_embeddings_noisy[i]
                else:
                    data_noisy = None

            for c in self.labels_unique:
                logger.info("Building a KNN index on the feature embeddings of normal samples from class {}".
                            format(c))
                self.index_knn[i][c] = KNNIndex(
                    data_normal[self.indices_true[c], :], n_neighbors=self.n_neighbors_per_class[c],
                    metric=self.metric, metric_kwargs=self.metric_kwargs,
                    approx_nearest_neighbors=self.approx_nearest_neighbors,
                    n_jobs=self.n_jobs,
                    low_memory=self.low_memory,
                    seed_rng=self.seed_rng
                )
                logger.info("Calculating LID estimates for the normal, noisy, and adversarial layer embeddings "
                            "predicted into class {}".format(c))
                # Distance to nearest neighbors of all labeled samples from class `c`
                _, nn_distances_temp = self.index_knn[i][c].query_self(k=self.n_neighbors_per_class[c])

                n_pred_normal = self.indices_pred_normal[c].shape[0]
                n_pred_adver = self.indices_pred_adver[c].shape[0]
                if noisy_data:
                    n_pred_noisy = self.indices_pred_noisy[c].shape[0]
                else:
                    n_pred_noisy = 0

                if n_pred_normal:
                    # Distance to nearest neighbors of samples predicted into class `c` that are also labeled as
                    # class `c`. These samples will be a part of the KNN index
                    nn_distances = helper_knn_distance(self.indices_pred_normal[c], self.indices_true[c],
                                                       nn_distances_temp)
                    mask = (nn_distances[:, 0] < 0.)
                    if np.any(mask):
                        # Distance to nearest neighbors of samples predicted into class `c` that are not labeled as
                        # class `c`. These samples will not be a part of the KNN index
                        ind_comp = self.indices_pred_normal[c][mask]
                        _, temp_arr = self.index_knn[i][c].query(data_normal[ind_comp, :],
                                                                 k=self.n_neighbors_per_class[c])
                        nn_distances[mask, :] = temp_arr

                    # LID estimates for the normal feature embeddings predicted into class `c`
                    features_lid_normal[self.indices_pred_normal[c], i] = lid_mle_amsaleg(nn_distances)

                # LID estimates for the noisy feature embeddings predicted into class `c`
                if n_pred_noisy:
                    temp_arr = data_noisy[self.indices_pred_noisy[c], :]
                    _, nn_distances = self.index_knn[i][c].query(temp_arr, k=self.n_neighbors_per_class[c])
                    features_lid_noisy[self.indices_pred_noisy[c], i] = lid_mle_amsaleg(nn_distances)

                # LID estimates for the adversarial feature embeddings predicted into class `c`
                if n_pred_adver:
                    temp_arr = data_adver[self.indices_pred_adver[c], :]
                    _, nn_distances = self.index_knn[i][c].query(temp_arr, k=self.n_neighbors_per_class[c])
                    features_lid_adversarial[self.indices_pred_adver[c], i] = lid_mle_amsaleg(nn_distances)

            if self.save_knn_indices_to_file:
                logger.info("Saving the KNN indices per class from layer {:d} to a pickle file".format(i + 1))
                self.temp_knn_files[i] = os.path.join(self.temp_direc, 'knn_indices_layer_{:d}.pkl'.format(i + 1))
                with open(self.temp_knn_files[i], 'wb') as fp:
                    pickle.dump(self.index_knn[i], fp)

                # Free up the allocated memory
                self.index_knn[i] = None

        # LID feature vectors and labels for the binary logistic classifier.
        # Normal and noisy samples are given label 0 and adversarial samples are given label 1
        n_pos = features_lid_adversarial.shape[0]
        if noisy_data:
            features_lid = np.concatenate([features_lid_normal, features_lid_noisy, features_lid_adversarial],
                                          axis=0)
            labels_bin = np.concatenate([np.zeros(features_lid_normal.shape[0], dtype=np.int),
                                         np.zeros(features_lid_noisy.shape[0], dtype=np.int),
                                         np.ones(n_pos, dtype=np.int)])
        else:
            features_lid = np.concatenate([features_lid_normal, features_lid_adversarial], axis=0)
            labels_bin = np.concatenate([np.zeros(features_lid_normal.shape[0], dtype=np.int),
                                         np.ones(n_pos, dtype=np.int)])

        pos_prop = n_pos / float(labels_bin.shape[0])
        # Randomly shuffle the samples to avoid determinism
        ind_perm = np.random.permutation(labels_bin.shape[0])
        features_lid = features_lid[ind_perm, :]
        labels_bin = labels_bin[ind_perm]
        # Min-max scaling for the LID features
        self.scaler = MinMaxScaler().fit(features_lid)
        features_lid = self.scaler.transform(features_lid)
        logger.info("Training a binary logistic classifier with {:d} samples and {:d} LID features.".
                    format(*features_lid.shape))
        logger.info("Using {:d}-fold cross-validation with area under ROC curve as the metric to select "
                    "the best regularization hyperparameter.".format(self.n_cv_folds))
        logger.info("Proportion of positive (adversarial or OOD) samples in the training data: {:.4f}".
                    format(pos_prop))
        class_weight = None
        if self.balanced_classification:
            if (pos_prop < 0.45) or (pos_prop > 0.55):
                class_weight = {0: 1.0 / (1 - pos_prop),
                                1: 1.0 / pos_prop}
                logger.info("Balancing the classes by assigning sample weight {:.4f} to class 0 and sample weight "
                            "{:.4f} to class 1".format(class_weight[0], class_weight[1]))

        self.model_logistic = LogisticRegressionCV(
            Cs=self.c_search_values,
            cv=self.n_cv_folds,
            penalty='l2',
            scoring='roc_auc',
            multi_class='auto',
            class_weight=class_weight,
            max_iter=self.max_iter,
            refit=True,
            n_jobs=self.n_jobs,
            random_state=self.seed_rng
        ).fit(features_lid, labels_bin)

        # Larger values of this score correspond to a higher probability of predicting class 1 (adversarial)
        scores_normal = self.model_logistic.decision_function(self.scaler.transform(features_lid_normal))
        scores_adversarial = self.model_logistic.decision_function(self.scaler.transform(features_lid_adversarial))
        if noisy_data:
            scores_noisy = self.model_logistic.decision_function(self.scaler.transform(features_lid_noisy))
            return self, scores_normal, scores_adversarial, scores_noisy
        else:
            return self, scores_normal, scores_adversarial
コード例 #5
0
    def score(self, layer_embeddings, labels_pred, return_corrected_predictions=False, start_layer=0,
              test_layer_pairs=True, is_train=False):
        """
        Given the layer embeddings (including possibly the input itself) and the predicted classes for test data,
        score them on how likely they are to be adversarial or out-of-distribution (OOD). Larger values of the
        scores correspond to a higher probability that the test sample is adversarial or OOD. The scores can be
        thresholded, with values above the threshold declared as adversarial or OOD. The threshold can be set such
        that the detector has a target false positive rate.

        :param layer_embeddings: list of numpy arrays with the layer embedding data. Length of the list is equal to
                                 the number of layers. The numpy array at index `i` has shape `(n, d_i)`, where `n`
                                 is the number of samples and `d_i` is the dimension of the embeddings at layer `i`.
        :param labels_pred: numpy array of class predictions made by the DNN.
        :param return_corrected_predictions: Set to True in order to get the most probable class prediction based
                                             on Bayes class posterior given the test statistic vector. Note that this
                                             will change the returned values.
        :param start_layer: Starting index of the layers to include in the p-value fusion. Set to 0 to include all
                            the layers. Set to negative values such as -1, -2, -3 using the same convention as
                            python indexing. For example, a value of `-3` implies the last 3 layers are included.
        :param test_layer_pairs: Set to True in order to estimate p-values for test statistics from all pairs of
                                 layers. These additional p-values are used by the method which combines p-values
                                 using Fisher's method, harmonic mean of p-values etc.
        :param is_train: Set to True if the inputs are the same non-adversarial inputs used with the `fit` method.

        :return: (scores [, corrected_classes])
            - scores: numpy array of scores for detection or ranking. The array should have shape
                      `(labels_pred.shape[0], )` and larger values correspond to a higher higher probability that
                      the sample is adversarial or OOD. Score corresponding to OOD detection is returned if
                      `self.ood_detection = True`.
            # returned only if `return_corrected_predictions = True`
            - corrected_classes: numpy array of the corrected class predictions. Has same shape and dtype as the
                                 array `labels_pred`.
        """
        n_test = labels_pred.shape[0]
        l = len(layer_embeddings)
        if l != self.n_layers:
            raise ValueError("Expecting {:d} layers in the input data, but received {:d}".format(self.n_layers, l))

        # Should bootstrap resampling be used to estimate the p-values at each layer?
        bootstrap = True
        if self.score_type in ('density', 'klpe'):
            if not self.use_top_ranked:
                # The p-values estimated are never used in this case. Therefore, skipping bootstrap to make it faster
                bootstrap = False

        # Test statistics at each layer conditioned on the predicted class and candidate true classes
        test_stats_pred = np.zeros((n_test, self.n_layers))
        pvalues_pred = np.zeros((n_test, self.n_layers))
        test_stats_true = {c: np.zeros((n_test, self.n_layers)) for c in self.labels_unique}
        pvalues_true = {c: np.zeros((n_test, self.n_layers)) for c in self.labels_unique}
        for i in range(self.n_layers):
            if self.transform_models:
                # Dimension reduction
                data_proj = transform_data_from_model(layer_embeddings[i], self.transform_models[i])
            else:
                data_proj = layer_embeddings[i]

            # Test statistics and negative log p-values for layer `i`
            test_stats_temp, pvalues_temp = self.test_stats_models[i].score(data_proj, labels_pred, is_train=is_train,
                                                                            bootstrap=bootstrap)
            # `test_stats_temp` and `pvalues_temp` will have shape `(n_test, self.n_classes + 1)`

            test_stats_pred[:, i] = test_stats_temp[:, 0]
            pvalues_pred[:, i] = pvalues_temp[:, 0]
            for j, c in enumerate(self.labels_unique):
                test_stats_true[c][:, i] = test_stats_temp[:, j + 1]
                pvalues_true[c][:, i] = pvalues_temp[:, j + 1]

        if self.use_top_ranked:
            # For the test statistics conditioned on the predicted class, take the largest `self.num_top_ranked`
            # negative log p-values across the layers
            test_stats_pred, pvalues_pred = self._get_top_ranked(test_stats_pred, pvalues_pred, reverse=True)

            # For the test statistics conditioned on the true class, take the smallest `self.num_top_ranked`
            # negative log p-values across the layers
            for c in self.labels_unique:
                test_stats_true[c], pvalues_true[c] = self._get_top_ranked(test_stats_true[c], pvalues_true[c])

        # Adversarial or OOD scores for the test samples and the corrected class predictions
        if self.score_type == 'density':
            scores_adver, scores_ood, corrected_classes = self._score_density_based(
                labels_pred, test_stats_pred, test_stats_true,
                return_corrected_predictions=return_corrected_predictions
            )
        elif self.score_type == 'pvalue':
            if test_layer_pairs:
                n_pairs = int(0.5 * self.n_layers * (self.n_layers - 1))
                # logger.info("Estimating p-values for the test statistics from {:d} layer pairs.".format(n_pairs))
                pvalues_pred_pairs = np.zeros((n_test, n_pairs))
                pvalues_true_pairs = dict()
                for c in self.labels_unique:
                    # Samples predicted into class `c`
                    ind = np.where(labels_pred == c)[0]
                    pvalues_pred_pairs[ind, :] = pvalue_score_all_pairs(
                        self.test_stats_pred_null[c], test_stats_pred[ind, :], log_transform=True, bootstrap=bootstrap
                    )
                    pvalues_true_pairs[c] = pvalue_score_all_pairs(
                        self.test_stats_true_null[c], test_stats_true[c], log_transform=True, bootstrap=bootstrap
                    )
                    # Append columns corresponding to the p-values from the layer pairs
                    pvalues_true[c] = np.hstack((pvalues_true[c], pvalues_true_pairs[c]))

                # Append columns corresponding to the p-values from the layer pairs
                pvalues_pred = np.hstack((pvalues_pred, pvalues_pred_pairs))

            scores_adver, scores_ood, corrected_classes = self._score_pvalue_based(
                labels_pred, pvalues_pred, pvalues_true,
                return_corrected_predictions=return_corrected_predictions, start_layer=start_layer
            )
        elif self.score_type == 'klpe':
            scores_adver, scores_ood, corrected_classes = self._score_klpe(
                labels_pred, test_stats_pred, test_stats_true,
                return_corrected_predictions=return_corrected_predictions
            )
        else:
            raise ValueError("Invalid score type '{}'".format(self.score_type))

        if return_corrected_predictions:
            if self.ood_detection:
                return scores_ood, corrected_classes
            else:
                return scores_adver, corrected_classes
        else:
            if self.ood_detection:
                return scores_ood
            else:
                return scores_adver
コード例 #6
0
    def fit(self, layer_embeddings, labels, labels_pred, **kwargs):
        """
        Estimate parameters of the detection method given natural (non-adversarial) input data.
        NOTE: Inputs to this method can be obtained by calling the function `extract_layer_embeddings`.

        :param layer_embeddings: list of numpy arrays with the layer embedding data. Length of the list is equal to
                                 the number of layers. The numpy array at index `i` has shape `(n, d_i)`, where `n`
                                 is the number of samples and `d_i` is the dimension of the embeddings at layer `i`.
        :param labels: numpy array of labels for the classification problem addressed by the DNN. Should have shape
                       `(n, )`, where `n` is the number of samples.
        :param labels_pred: numpy array of class predictions made by the DNN. Should have the same shape as `labels`.
        :param kwargs: dict with additional keyword arguments that can be passed to the `fit` method of the test
                       statistic class.

        :return: Instance of the class with all parameters fit to the data.
        """
        self.n_layers = len(layer_embeddings)
        self.labels_unique = np.unique(labels)
        self.n_classes = len(self.labels_unique)
        self.n_samples = labels.shape[0]

        logger.info("Number of classes: {:d}.".format(self.n_classes))
        logger.info("Number of layer embeddings: {:d}.".format(self.n_layers))
        logger.info("Number of samples: {:d}.".format(self.n_samples))
        logger.info("Test statistic calculated at each layer: {}.".format(self.layer_statistic))
        if labels_pred.shape[0] != self.n_samples:
            raise ValueError("Inputs 'labels' and 'labels_pred' do not have the same size.")

        if not all([layer_embeddings[i].shape[0] == self.n_samples for i in range(self.n_layers)]):
            raise ValueError("Input 'layer_embeddings' does not have the expected format")

        if self.use_top_ranked:
            if self.num_top_ranked > self.n_layers:
                logger.warning("Number of top-ranked layer statistics cannot be larger than the number of layers. "
                               "Setting it equal to the number of layers ({:d}).".format(self.n_layers))
                self.num_top_ranked = self.n_layers

        self.log_class_priors = np.zeros(self.n_classes)
        indices_true = dict()
        indices_pred = dict()
        test_stats_true = dict()
        pvalues_true = dict()
        test_stats_pred = dict()
        pvalues_pred = dict()
        for c in self.labels_unique:
            indices_true[c] = np.where(labels == c)[0]
            indices_pred[c] = np.where(labels_pred == c)[0]
            # Test statistics and negative log p-values across the layers for the samples labeled into class `c`
            test_stats_true[c] = np.zeros((indices_true[c].shape[0], self.n_layers))
            pvalues_true[c] = np.zeros((indices_true[c].shape[0], self.n_layers))

            # Test statistics and negative log p-values across the layers for the samples predicted into class `c`
            test_stats_pred[c] = np.zeros((indices_pred[c].shape[0], self.n_layers))
            pvalues_pred[c] = np.zeros((indices_pred[c].shape[0], self.n_layers))

            # Log of the class prior probability
            self.log_class_priors[c] = indices_true[c].shape[0]

        self.log_class_priors = np.log(self.log_class_priors) - np.log(self.n_samples)

        for i in range(self.n_layers):
            if self.transform_models:
                logger.info("Transforming the embeddings from layer {:d}.".format(i + 1))
                data_proj = transform_data_from_model(layer_embeddings[i], self.transform_models[i])
                logger.info("Input dimension = {:d}, projected dimension = {:d}".
                            format(layer_embeddings[i].shape[1], data_proj.shape[1]))
            else:
                data_proj = layer_embeddings[i]

            logger.info("Parameter estimation and test statistics calculation for layer {:d}:".format(i + 1))
            ts_obj = None
            # Bootstrap p-values are used only if `self.use_top_ranked = True` because in this case the test
            # statistics across the layers are ranked based on the p-values
            kwargs_fit = {'bootstrap': self.use_top_ranked}
            if self.layer_statistic == 'multinomial':
                ts_obj = MultinomialScore(
                    neighborhood_constant=self.neighborhood_constant,
                    n_neighbors=self.n_neighbors,
                    metric=self.metric,
                    metric_kwargs=self.metric_kwargs,
                    shared_nearest_neighbors=False,
                    approx_nearest_neighbors=self.approx_nearest_neighbors,
                    n_jobs=self.n_jobs,
                    low_memory=self.low_memory,
                    seed_rng=self.seed_rng
                )
                if 'combine_low_proba_classes' in kwargs:
                    kwargs_fit['combine_low_proba_classes'] = kwargs['combine_low_proba_classes']
                if 'n_classes_multinom' in kwargs:
                    kwargs_fit['n_classes_multinom'] = kwargs['n_classes_multinom']

            elif self.layer_statistic == 'binomial':
                ts_obj = BinomialScore(
                    neighborhood_constant=self.neighborhood_constant,
                    n_neighbors=self.n_neighbors,
                    metric=self.metric,
                    metric_kwargs=self.metric_kwargs,
                    shared_nearest_neighbors=False,
                    approx_nearest_neighbors=self.approx_nearest_neighbors,
                    n_jobs=self.n_jobs,
                    low_memory=self.low_memory,
                    seed_rng=self.seed_rng
                )
            elif self.layer_statistic == 'lid':
                ts_obj = LIDScore(
                    neighborhood_constant=self.neighborhood_constant,
                    n_neighbors=self.n_neighbors,
                    metric='euclidean',     # use 'euclidean' metric for LID estimation
                    metric_kwargs=None,
                    approx_nearest_neighbors=self.approx_nearest_neighbors,
                    n_jobs=self.n_jobs,
                    low_memory=self.low_memory,
                    seed_rng=self.seed_rng
                )
            elif self.layer_statistic == 'lle':
                ts_obj = LLEScore(
                    neighborhood_constant=self.neighborhood_constant,
                    n_neighbors=self.n_neighbors,
                    metric=self.metric,
                    metric_kwargs=self.metric_kwargs,
                    approx_nearest_neighbors=self.approx_nearest_neighbors,
                    n_jobs=self.n_jobs,
                    low_memory=self.low_memory,
                    seed_rng=self.seed_rng
                )
            elif self.layer_statistic == 'distance':
                ts_obj = DistanceScore(
                    neighborhood_constant=self.neighborhood_constant,
                    n_neighbors=self.n_neighbors,
                    metric=self.metric,
                    metric_kwargs=self.metric_kwargs,
                    approx_nearest_neighbors=self.approx_nearest_neighbors,
                    n_jobs=self.n_jobs,
                    low_memory=self.low_memory,
                    seed_rng=self.seed_rng
                )
            elif self.layer_statistic == 'trust':
                ts_obj = TrustScore(
                    neighborhood_constant=self.neighborhood_constant,
                    n_neighbors=self.n_neighbors,
                    metric=self.metric,
                    metric_kwargs=self.metric_kwargs,
                    approx_nearest_neighbors=self.approx_nearest_neighbors,
                    n_jobs=self.n_jobs,
                    low_memory=self.low_memory,
                    seed_rng=self.seed_rng
                )

            test_stats_temp, pvalues_temp = ts_obj.fit(
                data_proj, labels, labels_pred, labels_unique=self.labels_unique, **kwargs_fit
            )
            '''
            - `test_stats_temp` will be a numpy array of shape `(self.n_samples, self.n_classes + 1)` with a vector 
            of test statistics for each sample.
            The first column `test_stats_temp[:, 0]` gives the scores conditioned on the predicted class.
            The remaining columns `test_stats_temp[:, i]` for `i = 1, 2, . . .` gives the scores conditioned on 
            `i - 1` being the candidate true class for the sample.
            - `pvalues_temp` is also a numpy array of the same shape with the negative log transformed p-values 
            corresponding to the test statistics.
            '''
            self.test_stats_models.append(ts_obj)
            for j, c in enumerate(self.labels_unique):
                # Test statistics and negative log p-values from layer `i`
                test_stats_pred[c][:, i] = test_stats_temp[indices_pred[c], 0]
                pvalues_pred[c][:, i] = pvalues_temp[indices_pred[c], 0]
                test_stats_true[c][:, i] = test_stats_temp[indices_true[c], j + 1]
                pvalues_true[c][:, i] = pvalues_temp[indices_true[c], j + 1]

        for c in self.labels_unique:
            if self.use_top_ranked:
                logger.info("Using the test statistics corresponding to the smallest (largest) {:d} p-values "
                            "conditioned on the predicted (true) class.".format(self.num_top_ranked))
                # For the test statistics conditioned on the predicted class, take the largest
                # `self.num_top_ranked` negative log-transformed p-values across the layers
                test_stats_pred[c], pvalues_pred[c] = self._get_top_ranked(
                    test_stats_pred[c], pvalues_pred[c], reverse=True
                )
                # For the test statistics conditioned on the true class, take the smallest `self.num_top_ranked`
                # negative log-transformed p-values across the layers
                test_stats_true[c], pvalues_true[c] = self._get_top_ranked(test_stats_true[c], pvalues_true[c])

            if self.score_type == 'density':
                logger.info("Learning a joint probability density model for the test statistics conditioned on the "
                            "predicted class '{}':".format(c))
                logger.info("Number of samples = {:d}, dimension = {:d}".format(*test_stats_pred[c].shape))
                self.density_models_pred[c] = train_log_normal_mixture(test_stats_pred[c], seed_rng=self.seed_rng)

                # Negative log density of the data used to fit the model
                arr1 = -1. * score_log_normal_mixture(test_stats_pred[c], self.density_models_pred[c],
                                                      log_transform=True)
                # Generate a large number of random samples from the model
                test_stats_rand_sample, _ = self.density_models_pred[c].sample(n_samples=NUM_RANDOM_SAMPLES)
                # Negative log density of the generated random samples. Log transformation is not needed since the
                # samples are generated from the model
                arr2 = -1. * score_log_normal_mixture(test_stats_rand_sample, self.density_models_pred[c],
                                                      log_transform=False)
                self.samples_neg_log_dens_pred[c] = np.concatenate([arr1, arr2])
                logger.info("Number of log-density sample values used for estimating p-values: {:d}".
                            format(self.samples_neg_log_dens_pred[c].shape[0]))

                logger.info("Learning a joint probability density model for the test statistics conditioned on the "
                            "true class '{}':".format(c))
                logger.info("Number of samples = {:d}, dimension = {:d}".format(*test_stats_true[c].shape))
                self.density_models_true[c] = train_log_normal_mixture(test_stats_true[c], seed_rng=self.seed_rng)

                # Negative log density of the data used to fit the model
                arr1 = -1. * score_log_normal_mixture(test_stats_true[c], self.density_models_true[c],
                                                      log_transform=True)
                # Generate a large number of random samples from the model
                test_stats_rand_sample, _ = self.density_models_true[c].sample(n_samples=NUM_RANDOM_SAMPLES)
                # Negative log density of the generated random samples
                arr2 = -1. * score_log_normal_mixture(test_stats_rand_sample, self.density_models_true[c],
                                                      log_transform=False)
                self.samples_neg_log_dens_true[c] = np.concatenate([arr1, arr2])
                logger.info("Number of log-density sample values used for estimating p-values: {:d}".
                            format(self.samples_neg_log_dens_true[c].shape[0]))

            if self.score_type == 'klpe':
                # Not setting the number of neighbors here. This will be automatically set based on the number of
                # samples per class
                kwargs_lpe = {
                    'neighborhood_constant': self.neighborhood_constant,
                    'metric': self.metric,
                    'metric_kwargs': self.metric_kwargs,
                    'approx_nearest_neighbors': self.approx_nearest_neighbors,
                    'n_jobs': self.n_jobs,
                    'seed_rng': self.seed_rng
                }
                logger.info("Fitting the localized p-value estimation model for the test statistics conditioned on "
                            "the predicted class {}:".format(c))
                self.klpe_models_pred[c] = averaged_KLPE_anomaly_detection(**kwargs_lpe)
                self.klpe_models_pred[c].fit(test_stats_pred[c])

                logger.info("Fitting the localized p-value estimation model for the test statistics conditioned on "
                            "the true class {}:".format(c))
                self.klpe_models_true[c] = averaged_KLPE_anomaly_detection(**kwargs_lpe)
                self.klpe_models_true[c].fit(test_stats_true[c])

        self.test_stats_pred_null = test_stats_pred
        self.test_stats_true_null = test_stats_true
        return self
コード例 #7
0
    def score(self, layer_embeddings, is_train=False):
        """
        :param layer_embeddings: list of numpy arrays with the layer embedding data. Length of the list is equal to
                                 the number of layers. The numpy array at index `i` has shape `(n, d_i)`, where `n`
                                 is the number of samples and `d_i` is the dimension of the embeddings at layer `i`.
        :param is_train: Set to True if the inputs are the same non-adversarial inputs used with the `fit` method.

        :return: (scores, predictions)
            - scores: numpy array of scores corresponding to OOD or adversarial detection. It is the negative log
                      of the credibility scores. So high values of this score correspond to low credibility (i.e.
                      high probability of an outlier).
            - predictions: numpy array of the corrected deep kNN class predictions. Has the same shape as `scores`.
        """
        n_test = layer_embeddings[0].shape[0]
        l = len(layer_embeddings)
        if l != self.n_layers:
            raise ValueError(
                "Expecting {:d} layers in the input data, but received {:d}".
                format(self.n_layers, l))

        nonconformity_per_class = np.zeros((n_test, self.n_classes))
        for i in range(self.n_layers):
            if self.transform_models:
                # Dimension reduction
                data_proj = transform_data_from_model(layer_embeddings[i],
                                                      self.transform_models[i])
            else:
                data_proj = layer_embeddings[i]

            # Indices of the nearest neighbors of each test sample
            if is_train:
                nn_indices, _ = self.index_knn[i].query_self(
                    k=self.n_neighbors)
            else:
                nn_indices, _ = self.index_knn[i].query(data_proj,
                                                        k=self.n_neighbors)

            # Class label counts among the nearest neighbors
            _, nc_counts = neighbors_label_counts(nn_indices,
                                                  self.labels_train_enc,
                                                  self.n_classes)

            for j, c in enumerate(self.labels_unique):
                # Neighborhood counts of all classes except `c`
                nc_counts_slice = nc_counts[:, self.mask_exclude[j, :]]
                # Nonconformity w.r.t class `c` from layer `i`
                nonconformity_per_class[:, j] += np.sum(nc_counts_slice,
                                                        axis=1)

        # Calculate the p-values per-class with respect to the non-conformity scores of the calibration set
        mask = self.nonconformity_calib[:, np.
                                        newaxis] >= nonconformity_per_class.ravel(
                                        )[np.newaxis, :]
        v = np.sum(mask, axis=0) / float(mask.shape[0])
        p_values = v.reshape((n_test, self.n_classes))
        # Credibility is the maximum p-value over all classes
        credibility = np.max(p_values, axis=1)
        # Anomaly score
        scores = -np.log(np.clip(credibility, sys.float_info.min, None))
        # Deep k-NN prediction is the class corresponding to the largest p-value
        predictions = np.array(
            [self.labels_unique[j] for j in np.argmax(p_values, axis=1)],
            dtype=self.labels_unique.dtype)

        return scores, predictions
コード例 #8
0
    def fit(self, layer_embeddings, labels):
        """
        Estimate parameters of the detection method given natural (non-adversarial) input data. Note that this
        data should be different from that used to train the DNN classifier.
        NOTE: Inputs to this method can be obtained by calling the function `extract_layer_embeddings`.

        :param layer_embeddings: list of numpy arrays with the layer embedding data. Length of the list is equal to
                                 the number of layers. The numpy array at index `i` has shape `(n, d_i)`, where `n`
                                 is the number of samples and `d_i` is the dimension of the embeddings at layer `i`.
        :param labels: numpy array of labels for the classification problem addressed by the DNN. Should have shape
                       `(n, )`, where `n` is the number of samples.
        :return: Instance of the class with all parameters fit to the data.
        """
        self.n_layers = len(layer_embeddings)
        self.labels_unique = np.unique(labels)
        self.n_classes = len(self.labels_unique)
        self.n_samples = labels.shape[0]
        # Mapping from the original labels to the set {0, 1, . . .,self.n_classes - 1}. This is needed by the label
        # count function
        d = dict(zip(self.labels_unique, np.arange(self.n_classes)))
        self.label_encoder = np.vectorize(d.__getitem__)

        # Number of nearest neighbors
        if self.n_neighbors is None:
            # Set number of nearest neighbors based on the data size and the neighborhood constant
            self.n_neighbors = int(
                np.ceil(self.n_samples**self.neighborhood_constant))

        logger.info("Number of classes: {:d}.".format(self.n_classes))
        logger.info("Number of layer embeddings: {:d}.".format(self.n_layers))
        logger.info("Number of samples: {:d}.".format(self.n_samples))
        logger.info("Number of neighbors: {:d}.".format(self.n_neighbors))
        if not all([
                layer_embeddings[i].shape[0] == self.n_samples
                for i in range(self.n_layers)
        ]):
            raise ValueError(
                "Input 'layer_embeddings' does not have the expected format")

        self.labels_train_enc = self.label_encoder(labels)
        indices_true = dict()
        self.mask_exclude = np.ones((self.n_classes, self.n_classes),
                                    dtype=np.bool)
        for j, c in enumerate(self.labels_unique):
            # Index of labeled samples from class `c`
            indices_true[c] = np.where(labels == c)[0]
            self.mask_exclude[j, j] = False

        self.nonconformity_calib = np.zeros(self.n_samples)
        self.index_knn = [None for _ in range(self.n_layers)]
        for i in range(self.n_layers):
            logger.info("Processing layer {:d}:".format(i + 1))
            if self.transform_models:
                logger.info(
                    "Transforming the embeddings from layer {:d}.".format(i +
                                                                          1))
                data_proj = transform_data_from_model(layer_embeddings[i],
                                                      self.transform_models[i])
                logger.info(
                    "Input dimension = {:d}, projected dimension = {:d}".
                    format(layer_embeddings[i].shape[1], data_proj.shape[1]))
            else:
                data_proj = layer_embeddings[i]

            logger.info("Building a KNN index for nearest neighbor queries.")
            # Build a KNN index on the set of feature embeddings from normal samples from layer `i`
            self.index_knn[i] = KNNIndex(
                data_proj,
                n_neighbors=self.n_neighbors,
                metric=self.metric,
                metric_kwargs=self.metric_kwargs,
                approx_nearest_neighbors=self.approx_nearest_neighbors,
                n_jobs=self.n_jobs,
                low_memory=self.low_memory,
                seed_rng=self.seed_rng)
            # Indices of the nearest neighbors of each sample
            nn_indices, _ = self.index_knn[i].query_self(k=self.n_neighbors)
            logger.info(
                "Calculating the class label counts and non-conformity scores in the neighborhood of "
                "each sample.")
            _, nc_counts = neighbors_label_counts(nn_indices,
                                                  self.labels_train_enc,
                                                  self.n_classes)

            for j, c in enumerate(self.labels_unique):
                # Neighborhood counts of all classes except `c`
                nc_counts_slice = nc_counts[:, self.mask_exclude[j, :]]
                # Nonconformity from layer `i` for all labeled samples from class `c`
                self.nonconformity_calib[indices_true[c]] += np.sum(
                    nc_counts_slice[indices_true[c], :], axis=1)

        return self
コード例 #9
0
    def fit(self, data, labels, labels_pred):
        """
        Estimate the `1 - alpha` density level sets for each class using the given data, with true labels and
        classifier-predicted labels. This will be used to calculate the trust score.

        :param data: numpy array with the feature vectors of shape `(n, d)`, where `n` and `d` are the number of
                     samples and the data dimension respectively.
        :param labels: numpy array of labels for the classification problem addressed by the DNN. Should have shape
                       `(n, )`, where `n` is the number of samples.
        :param labels_pred: numpy array similar to `labels`, but with the classes predicted by the classifier.

        :return: Instance of the class with all parameters fit to the data.
        """
        self.n_samples, dim = data.shape
        self.labels_unique = np.unique(labels)
        self.n_classes = len(self.labels_unique)
        if self.n_neighbors is None:
            # Set number of nearest neighbors based on the maximum number of samples per class and the neighborhood
            # constant
            num = 0
            for c in self.labels_unique:
                ind = np.where(labels == c)[0]
                if ind.shape[0] > num:
                    num = ind.shape[0]

            self.n_neighbors = int(np.ceil(num ** self.neighborhood_constant))

        logger.info("Number of samples: {:d}. Data dimension = {:d}.".format(self.n_samples, dim))
        logger.info("Number of classes: {:d}.".format(self.n_classes))
        logger.info("Number of neighbors (k): {:d}.".format(self.n_neighbors))
        logger.info("Fraction of outliers (alpha): {:.4f}.".format(self.alpha))
        if self.model_dim_reduction:
            data = transform_data_from_model(data, self.model_dim_reduction)
            dim = data.shape[1]
            logger.info("Applying dimension reduction to the data. Projected dimension = {:d}.".format(dim))

        # Distance from each sample in `data` to the `1 - alpha` level sets corresponding to each class
        distance_level_sets = np.zeros((self.n_samples, self.n_classes))
        self.index_knn = dict()
        self.epsilon = dict()
        indices_sub = dict()
        for j, c in enumerate(self.labels_unique):
            logger.info("Processing data from class '{}':".format(c))
            logger.info("Building a KNN index for all the samples from class '{}'.".format(c))
            indices_sub[c] = np.where(labels == c)[0]
            data_sub = data[indices_sub[c], :]
            self.index_knn[c] = KNNIndex(
                data_sub, n_neighbors=self.n_neighbors,
                metric=self.metric, metric_kwargs=self.metric_kwargs,
                approx_nearest_neighbors=self.approx_nearest_neighbors,
                n_jobs=self.n_jobs,
                low_memory=self.low_memory,
                seed_rng=self.seed_rng
            )
            # Distances to the k nearest neighbors of each sample
            _, nn_distances = self.index_knn[c].query_self(k=self.n_neighbors)
            # Radius or distance to the k-th nearest neighbor for each sample
            radius_arr = nn_distances[:, self.n_neighbors - 1]

            # Smallest radius `epsilon` such that only `alpha` fraction of the samples from class `c` have radius
            # greater than `epsilon`
            if self.alpha > 0.:
                self.epsilon[c] = np.percentile(radius_arr, 100 * (1 - self.alpha), interpolation='midpoint')

                # Exclude the outliers and build a KNN index with the remaining samples
                mask_incl = radius_arr <= self.epsilon[c]
                mask_excl = np.logical_not(mask_incl)
                num_excl = mask_excl[mask_excl].shape[0]
            else:
                # Slightly larger value than the largest radius
                self.epsilon[c] = 1.0001 * np.max(radius_arr)

                # All samples are included in the density level set
                mask_incl = np.ones(indices_sub[c].shape[0], dtype=np.bool)
                mask_excl = np.logical_not(mask_incl)
                num_excl = 0

            if num_excl:
                logger.info("Excluding {:d} samples with radius larger than {:.6f} and building a KNN index with "
                            "the remaining samples.".format(num_excl, self.epsilon[c]))
                self.index_knn[c] = KNNIndex(
                    data_sub[mask_incl, :], n_neighbors=self.n_neighbors,
                    metric=self.metric, metric_kwargs=self.metric_kwargs,
                    approx_nearest_neighbors=self.approx_nearest_neighbors,
                    n_jobs=self.n_jobs,
                    low_memory=self.low_memory,
                    seed_rng=self.seed_rng
                )
                # Distance to the nearest neighbor of each sample that is part of the KNN index
                _, dist_temp = self.index_knn[c].query_self(k=1)
                ind = indices_sub[c][mask_incl]
                distance_level_sets[ind, j] = dist_temp[:, 0]

                # Distance to the nearest neighbor of each sample that is not a part of the KNN index (outliers)
                _, dist_temp = self.index_knn[c].query(data_sub[mask_excl, :], k=1)
                ind = indices_sub[c][mask_excl]
                distance_level_sets[ind, j] = dist_temp[:, 0]
            else:
                # No need to rebuild the KNN index because no samples are excluded.
                # Distance to the nearest neighbor of each sample
                distance_level_sets[indices_sub[c], j] = nn_distances[:, 0]

        logger.info("Calculating the trust score for the estimation data.")
        for c in self.labels_unique:
            # Compute the distance from each sample from class `c` to the level sets from the remaining classes
            data_sub = data[indices_sub[c], :]
            for j, c_hat in enumerate(self.labels_unique):
                if c_hat == c:
                    continue

                _, dist_temp = self.index_knn[c_hat].query(data_sub, k=1)
                distance_level_sets[indices_sub[c], j] = dist_temp[:, 0]

        self.scores_estim = self._score_helper(distance_level_sets, labels_pred)
        return self