コード例 #1
0
def select_predictive_length(
        X: np.ndarray,
        A: np.ndarray,
        k: int,
        optimality: str = 'A') -> Tuple[np.ndarray, float]:
    """

    :param X: np.ndarray, matrix of features
    :param A: np.ndarray, prior precision matrix
    :param k: int, number of samples to select
    :param optimality: str, optimality type, one of ["A", "C", "D", "V"]
    :return: indexes of samples subset
    """
    optimalty_func = _get_optimalty(optimality)
    num_samples = X.shape[0]
    assert num_samples >= k, f'number of samples should be greater than k'
    probs = np.linalg.norm(X, axis=1)
    probs = probs / np.sum(probs)
    selected_ixs = np.random.choice(num_samples,
                                    size=k,
                                    replace=False,
                                    p=probs)
    optimality_value = optimalty_func(Sigma=subset_covariance(X[selected_ixs]),
                                      A=A,
                                      X=X)
    return selected_ixs, optimality_value
コード例 #2
0
def select_bayesian_dpp(X: np.ndarray,
                        A: np.ndarray,
                        k: int,
                        optimality: str = 'A',
                        with_sdp: bool = False) -> \
        Tuple[np.ndarray, float]:
    """

    :param sigma: float, variance
    :param X: np.ndarray, matrix of features
    :param A: np.ndarray, prior precision matrix
    :param k: int, number of samples to select
    :param optimality: str, optimality type, one of ["A", "C", "D", "V"]
    :return: indexes of samples subset
    """
    optimalty_func = _get_optimalty(optimality)
    num_samples = X.shape[0]
    assert num_samples >= k, f'number of samples should be greater than k'
    if with_sdp:
        p = sdp(X, A, k, optimalty_func)
    else:
        p = k/num_samples * np.ones(num_samples)
    Z = A + (X.T * p) @ X
    B = (np.sqrt(p[:,None]) * X) @ frac_power(Z,-0.5)
    DPP = dpp(B)
    DPP.sample_exact()
    all_n = np.arange(num_samples)
    b = set(all_n[np.random.uniform(size = num_samples) < p])
    selected_ixs = set(DPP.list_of_samples[0]) | b
    selected_ixs = np.array(list(selected_ixs))
    optimality_value = optimalty_func(Sigma=subset_covariance(X[selected_ixs]), A=A, X=X)
    return selected_ixs, optimality_value
コード例 #3
0
def select_bottom_up(X: np.ndarray,
                     A: np.ndarray,
                     k: int,
                     optimality: str = 'A') -> Tuple[np.ndarray, float]:
    """

    :param X: np.ndarray, matrix of features
    :param A: np.ndarray, prior precision matrix
    :param k: int, number of samples to select
    :param optimality: str, optimality type, one of ["A", "C", "D", "V"]
    :return: indexes of samples subset, optimality for them
    """
    num_samples = X.shape[0]
    assert num_samples >= k, f'number of samples should be greater than k'
    selected_ixs = []
    full_samples_set = set(list(range(num_samples)))
    optimalty_func = _get_optimalty(optimality)
    global_score = None
    for i in range(k):
        candidate_samples = list(full_samples_set - set(selected_ixs))
        current_optimalty = np.Inf
        optimal_sample = None
        for candidate_sample in candidate_samples:
            candidate_ixs = np.append(selected_ixs,
                                      [candidate_sample]).astype(int)
            candidate_optimality = optimalty_func(Sigma=subset_covariance(
                X[candidate_ixs]),
                                                  A=A,
                                                  X=X)
            if candidate_optimality < current_optimalty:
                current_optimalty = candidate_optimality
                optimal_sample = candidate_sample
        if optimal_sample is not None:
            selected_ixs.append(optimal_sample)
        if i == k - 1:
            global_score = current_optimalty

    return np.array(selected_ixs), global_score
コード例 #4
0
            to_plot = []
            for optimality in opt_types:
                upper_estimation_bottom_up = []
                for ix in tqdm(range(len(k_linspace))):
                    k = k_linspace[ix]
                    # Sigma
                    assert num_samples >= k, f'number of samples should be greater than k'
                    selected_ixs = []
                    full_samples_set = set(list(range(num_samples)))
                    candidate_samples = list(full_samples_set -
                                             set(selected_ixs))

                    for candidate_sample in candidate_samples:
                        candidate_ixs = np.append(
                            selected_ixs, [candidate_sample]).astype(int)
                    Sigma = subset_covariance(X[candidate_ixs])

                    upper_estimation_bottom_up.append(
                        upper_estimation(X=X,
                                         A=A,
                                         k=k,
                                         Sigma=Sigma,
                                         opt_type=optimality))

                plot_prop = {'label': optimality}

                object_to_plot = get_object_to_plot(
                    X=k_linspace,
                    Y=upper_estimation_bottom_up,
                    plot_prop=plot_prop)
                to_plot.append(object_to_plot)