def _get_decision_scores(self, X): # ensure local region size is within acceptable limits self.local_region_size = max(self.local_region_size, self.local_region_min) self.local_region_size = min(self.local_region_size, self.local_region_max) # standardize test data and get local region for each test instance X_test_norm = X ind_arr = self._get_local_region(X_test_norm) # calculate test scores test_scores = np.zeros([X_test_norm.shape[0], self.n_clf]) for k, estimator in enumerate(self.estimator_list): test_scores[:, k] = estimator.decision_function(X_test_norm) # generate standardized scores train_scores_norm, test_scores_norm = standardizer(self.train_scores_, test_scores) # generate pseudo target for training --> for calculating weights self.training_pseudo_label_ = np.max(train_scores_norm, axis=1).reshape(-1, 1) # placeholder for predictions pred_scores_ens = np.zeros([X_test_norm.shape[0], ]) # iterate through test instances (ind_arr indices correspond to x_test) for i, ind_k in enumerate(ind_arr): # get pseudo target and training scores in local region of test instance local_pseudo_ground_truth = self.training_pseudo_label_[ind_k,].ravel() local_train_scores = train_scores_norm[ind_k, :] # calculate pearson correlation between local pseudo ground truth and local train scores pearson_corr_scores = np.zeros([self.n_clf, ]) for d in range(self.n_clf): pearson_corr_scores[d,] = pearsonr(local_pseudo_ground_truth, local_train_scores[:, d])[0] # return best score pred_scores_ens[i,] = np.mean( test_scores_norm[i, self._get_competent_detectors(pearson_corr_scores)]) return pred_scores_ens
def _get_decision_scores(self, X): """ Helper function for getting outlier scores on test data X (note: model must already be fit) Parameters ---------- X : numpy array, shape (n_samples, n_features) Test data Returns ------- pred_scores_ens : numpy array, shape (n_samples,) Outlier scores for test samples """ # raise warning if local region size is outside acceptable limits if (self.local_region_size < self.local_region_min) or ( self.local_region_size > self.local_region_max): warnings.warn("Local region size of {} is outside " "recommended range [{}, {}]".format( self.local_region_size, self.local_region_min, self.local_region_max)) # standardize test data and get local region for each test instance X_test_norm = X test_local_regions = self._get_local_region(X_test_norm) # calculate test scores test_scores = np.zeros([X_test_norm.shape[0], self.n_clf]) for k, detector in enumerate(self.detector_list): test_scores[:, k] = detector.decision_function(X_test_norm) # generate standardized scores train_scores_norm, test_scores_norm = standardizer(self.train_scores_, test_scores) # generate pseudo target for training --> for calculating weights self.training_pseudo_label_ = np.max(train_scores_norm, axis=1).reshape(-1, 1) # placeholder for ensemble predictions pred_scores_ens = np.zeros([X_test_norm.shape[0], ]) # iterate through test instances (test_local_regions # indices correspond to x_test) for i, test_local_region in enumerate(test_local_regions): # get pseudo target and training scores in local region of # test instance local_pseudo_ground_truth = self.training_pseudo_label_[ test_local_region,].ravel() local_train_scores = train_scores_norm[test_local_region, :] # calculate pearson correlation between local pseudo ground truth # and local train scores pearson_corr_scores = np.zeros([self.n_clf, ]) for d in range(self.n_clf): pearson_corr_scores[d,] = pearsonr( local_pseudo_ground_truth, local_train_scores[:, d])[0] # return best score pred_scores_ens[i,] = np.mean( test_scores_norm[ i, self._get_competent_detectors(pearson_corr_scores)]) return pred_scores_ens