def reduce_samples(self, X, number_to_keep): """ A method to compute the variance to the mean of samples for every sub-region and select the sub-regions with the lowest variance. The method doesn't consider the possibility that the number of sub-regions vary from image to image since it doesn't make sense to reduce the number of sub-regions in this case. Inputs: * X = a 3-D numpy array corresponding to machine learning features. The first dimension is linked to the image where the feature was computed, the second dimension is linked to the sub-regions to compute the features. * number_to_keep = number of samples to keep. Parameters: * parallel = a boolean to activate parallel computation or not. * n_jobs = number of jobs to create for parallel computation. Outputs: * indexes = indexes corresponding to the data to keep""" # Basic check if number_to_keep >= X.shape[1]: logging.warning('The number of sub-region to keep is greater or equal ' + \ 'to the number of sub-regions.') return X # Compute variances if self.parallel: variances_list = \ Parallel(n_jobs=self.n_jobs)(delayed(compute_variance_one_region)(X[:,region_index,:], self.distance_function, self.distance_args, self.mean_function, self.mean_args) for region_index in range(X.shape[1])) else: variances_list = [] for region_index in trange(X.shape[1]): variances_list.append( compute_variance_one_region(X[:, region_index, :], self.distance_function, self.distance_args, self.mean_function, self.mean_args)) # Making sure to discard nan values if it happens by assigning them to np.inf variances_list = np.array(variances_list) variances_list[np.isnan(variances_list)] = np.inf # Sorting variances and keeping only the number_to_keep lowest features indexes = variances_list.argsort() return indexes[:number_to_keep]