def select_predictive_length( X: np.ndarray, A: np.ndarray, k: int, optimality: str = 'A') -> Tuple[np.ndarray, float]: """ :param X: np.ndarray, matrix of features :param A: np.ndarray, prior precision matrix :param k: int, number of samples to select :param optimality: str, optimality type, one of ["A", "C", "D", "V"] :return: indexes of samples subset """ optimalty_func = _get_optimalty(optimality) num_samples = X.shape[0] assert num_samples >= k, f'number of samples should be greater than k' probs = np.linalg.norm(X, axis=1) probs = probs / np.sum(probs) selected_ixs = np.random.choice(num_samples, size=k, replace=False, p=probs) optimality_value = optimalty_func(Sigma=subset_covariance(X[selected_ixs]), A=A, X=X) return selected_ixs, optimality_value
def select_bayesian_dpp(X: np.ndarray, A: np.ndarray, k: int, optimality: str = 'A', with_sdp: bool = False) -> \ Tuple[np.ndarray, float]: """ :param sigma: float, variance :param X: np.ndarray, matrix of features :param A: np.ndarray, prior precision matrix :param k: int, number of samples to select :param optimality: str, optimality type, one of ["A", "C", "D", "V"] :return: indexes of samples subset """ optimalty_func = _get_optimalty(optimality) num_samples = X.shape[0] assert num_samples >= k, f'number of samples should be greater than k' if with_sdp: p = sdp(X, A, k, optimalty_func) else: p = k/num_samples * np.ones(num_samples) Z = A + (X.T * p) @ X B = (np.sqrt(p[:,None]) * X) @ frac_power(Z,-0.5) DPP = dpp(B) DPP.sample_exact() all_n = np.arange(num_samples) b = set(all_n[np.random.uniform(size = num_samples) < p]) selected_ixs = set(DPP.list_of_samples[0]) | b selected_ixs = np.array(list(selected_ixs)) optimality_value = optimalty_func(Sigma=subset_covariance(X[selected_ixs]), A=A, X=X) return selected_ixs, optimality_value
def select_bottom_up(X: np.ndarray, A: np.ndarray, k: int, optimality: str = 'A') -> Tuple[np.ndarray, float]: """ :param X: np.ndarray, matrix of features :param A: np.ndarray, prior precision matrix :param k: int, number of samples to select :param optimality: str, optimality type, one of ["A", "C", "D", "V"] :return: indexes of samples subset, optimality for them """ num_samples = X.shape[0] assert num_samples >= k, f'number of samples should be greater than k' selected_ixs = [] full_samples_set = set(list(range(num_samples))) optimalty_func = _get_optimalty(optimality) global_score = None for i in range(k): candidate_samples = list(full_samples_set - set(selected_ixs)) current_optimalty = np.Inf optimal_sample = None for candidate_sample in candidate_samples: candidate_ixs = np.append(selected_ixs, [candidate_sample]).astype(int) candidate_optimality = optimalty_func(Sigma=subset_covariance( X[candidate_ixs]), A=A, X=X) if candidate_optimality < current_optimalty: current_optimalty = candidate_optimality optimal_sample = candidate_sample if optimal_sample is not None: selected_ixs.append(optimal_sample) if i == k - 1: global_score = current_optimalty return np.array(selected_ixs), global_score
to_plot = [] for optimality in opt_types: upper_estimation_bottom_up = [] for ix in tqdm(range(len(k_linspace))): k = k_linspace[ix] # Sigma assert num_samples >= k, f'number of samples should be greater than k' selected_ixs = [] full_samples_set = set(list(range(num_samples))) candidate_samples = list(full_samples_set - set(selected_ixs)) for candidate_sample in candidate_samples: candidate_ixs = np.append( selected_ixs, [candidate_sample]).astype(int) Sigma = subset_covariance(X[candidate_ixs]) upper_estimation_bottom_up.append( upper_estimation(X=X, A=A, k=k, Sigma=Sigma, opt_type=optimality)) plot_prop = {'label': optimality} object_to_plot = get_object_to_plot( X=k_linspace, Y=upper_estimation_bottom_up, plot_prop=plot_prop) to_plot.append(object_to_plot)