Exemple #1
0
    def _get_decision_scores(self, X):

        # ensure local region size is within acceptable limits
        self.local_region_size = max(self.local_region_size, self.local_region_min)
        self.local_region_size = min(self.local_region_size, self.local_region_max)

        # standardize test data and get local region for each test instance
        X_test_norm = X
        ind_arr = self._get_local_region(X_test_norm)

        # calculate test scores
        test_scores = np.zeros([X_test_norm.shape[0], self.n_clf])
        for k, estimator in enumerate(self.estimator_list):
            test_scores[:, k] = estimator.decision_function(X_test_norm)

        # generate standardized scores
        train_scores_norm, test_scores_norm = standardizer(self.train_scores_, test_scores)

        # generate pseudo target for training --> for calculating weights
        self.training_pseudo_label_ = np.max(train_scores_norm, axis=1).reshape(-1, 1)

        # placeholder for predictions
        pred_scores_ens = np.zeros([X_test_norm.shape[0], ])

        # iterate through test instances (ind_arr indices correspond to x_test)
        for i, ind_k in enumerate(ind_arr):

            # get pseudo target and training scores in local region of test instance
            local_pseudo_ground_truth = self.training_pseudo_label_[ind_k,].ravel()
            local_train_scores = train_scores_norm[ind_k, :]

            # calculate pearson correlation between local pseudo ground truth and local train scores
            pearson_corr_scores = np.zeros([self.n_clf, ])
            for d in range(self.n_clf):
                pearson_corr_scores[d,] = pearsonr(local_pseudo_ground_truth, local_train_scores[:, d])[0]

            # return best score
            pred_scores_ens[i,] = np.mean(
                test_scores_norm[i, self._get_competent_detectors(pearson_corr_scores)])

        return pred_scores_ens
Exemple #2
0
    def _get_decision_scores(self, X):
        """ Helper function for getting outlier scores on test data X (note:
        model must already be fit)

        Parameters
        ----------
        X : numpy array, shape (n_samples, n_features)
            Test data

        Returns
        -------
        pred_scores_ens : numpy array, shape (n_samples,)
            Outlier scores for test samples
        """

        # raise warning if local region size is outside acceptable limits
        if (self.local_region_size < self.local_region_min) or (
                self.local_region_size > self.local_region_max):
            warnings.warn("Local region size of {} is outside "
                          "recommended range [{}, {}]".format(
                self.local_region_size, self.local_region_min,
                self.local_region_max))

        # standardize test data and get local region for each test instance
        X_test_norm = X
        test_local_regions = self._get_local_region(X_test_norm)

        # calculate test scores
        test_scores = np.zeros([X_test_norm.shape[0], self.n_clf])
        for k, detector in enumerate(self.detector_list):
            test_scores[:, k] = detector.decision_function(X_test_norm)

        # generate standardized scores
        train_scores_norm, test_scores_norm = standardizer(self.train_scores_,
                                                           test_scores)

        # generate pseudo target for training --> for calculating weights
        self.training_pseudo_label_ = np.max(train_scores_norm,
                                             axis=1).reshape(-1, 1)

        # placeholder for ensemble predictions
        pred_scores_ens = np.zeros([X_test_norm.shape[0], ])

        # iterate through test instances (test_local_regions
        # indices correspond to x_test)
        for i, test_local_region in enumerate(test_local_regions):

            # get pseudo target and training scores in local region of
            # test instance
            local_pseudo_ground_truth = self.training_pseudo_label_[
                test_local_region,].ravel()
            local_train_scores = train_scores_norm[test_local_region, :]

            # calculate pearson correlation between local pseudo ground truth
            # and local train scores
            pearson_corr_scores = np.zeros([self.n_clf, ])
            for d in range(self.n_clf):
                pearson_corr_scores[d,] = pearsonr(
                    local_pseudo_ground_truth, local_train_scores[:, d])[0]

            # return best score
            pred_scores_ens[i,] = np.mean(
                test_scores_norm[
                    i, self._get_competent_detectors(pearson_corr_scores)])

        return pred_scores_ens
Exemple #3
0
    def _get_decision_scores(self, X):
        """ Helper function for getting outlier scores on test data X (note:
        model must already be fit)

        Parameters
        ----------
        X : numpy array, shape (n_samples, n_features)
            Test data

        Returns
        -------
        pred_scores_ens : numpy array, shape (n_samples,)
            Outlier scores for test samples
        """

        # raise warning if local region size is outside acceptable limits
        if (self.local_region_size < self.local_region_min) or (
                self.local_region_size > self.local_region_max):
            warnings.warn("Local region size of {} is outside "
                          "recommended range [{}, {}]".format(
                self.local_region_size, self.local_region_min,
                self.local_region_max))

        # standardize test data and get local region for each test instance
        X_test_norm = X
        test_local_regions = self._get_local_region(X_test_norm)

        # calculate test scores
        test_scores = np.zeros([X_test_norm.shape[0], self.n_clf])
        for k, detector in enumerate(self.detector_list):
            test_scores[:, k] = detector.decision_function(X_test_norm)

        # generate standardized scores
        train_scores_norm, test_scores_norm = standardizer(self.train_scores_,
                                                           test_scores)

        # generate pseudo target for training --> for calculating weights
        self.training_pseudo_label_ = np.max(train_scores_norm,
                                             axis=1).reshape(-1, 1)

        # placeholder for ensemble predictions
        pred_scores_ens = np.zeros([X_test_norm.shape[0], ])

        # iterate through test instances (test_local_regions
        # indices correspond to x_test)
        for i, test_local_region in enumerate(test_local_regions):

            # get pseudo target and training scores in local region of
            # test instance
            local_pseudo_ground_truth = self.training_pseudo_label_[
                test_local_region,].ravel()
            local_train_scores = train_scores_norm[test_local_region, :]

            # calculate pearson correlation between local pseudo ground truth
            # and local train scores
            pearson_corr_scores = np.zeros([self.n_clf, ])
            for d in range(self.n_clf):
                pearson_corr_scores[d,] = pearsonr(
                    local_pseudo_ground_truth, local_train_scores[:, d])[0]

            # return best score
            pred_scores_ens[i,] = np.mean(
                test_scores_norm[
                    i, self._get_competent_detectors(pearson_corr_scores)])

        return pred_scores_ens