Esempio n. 1
0
 def test_normalize_kernel_matrix(self):
     """"""
     m = [
         [1, 2],
         [2, 3],
         [1, 3],
     ]
     m = np.array(m)
     normalized_matrix = normalize_kernel_matrix(m)
Esempio n. 2
0
def optimize_projections(
    *,
    output: str,
    repr_similarity_matrix,
    full_similarity_matrix,
    n_components: int,
    similarity_type: str,
    use_gpu: bool,
) -> None:
    """

    :param output: The output folder
    :param repr_similarity_matrix: A square matrix with dimensions |repr| x |repr|
    :param full_similarity_matrix: A rectangular matrix with dimensions |full| x |repr|
    :param n_components:
    :return:
    """
    khc = ((kernel_name, KERNEL_TO_PROJECTION[kernel_name], hyperparam)
           for kernel_name, hyperparams in kernels.items()
           for hyperparam in hyperparams)

    for kernel_name, project_with_kernel, hyperparam in khc:
        # Make output folder for the optimization with this kernel/hyper-parameter pair
        param_folder = os.path.join(output, f'{kernel_name}_{hyperparam}')
        os.makedirs(param_folder, exist_ok=True)

        secho(
            f"({kernel_name}/{hyperparam}) calculating normalized/symmetric kernel matrix"
        )
        repr_kernel_matrix = project_with_kernel(repr_similarity_matrix,
                                                 hyperparam)
        repr_kernel_matrix_normalized = normalize_kernel_matrix(
            repr_kernel_matrix)

        secho(
            f"({kernel_name}/{hyperparam}) solving eigenvector/eigenvalues problem"
        )
        eigenvalues, eigenvectors = eigh(repr_kernel_matrix_normalized)

        # Calculate alphas
        repr_alphas = np.column_stack(
            [eigenvectors[:, -i] for i in range(1, n_components + 1)])
        # Save Alphas
        _alphas_path = os.path.join(param_folder, f"alphas.p")
        secho(
            f"({kernel_name}/{hyperparam}) outputting alphas to {_alphas_path}"
        )
        with open(_alphas_path, "wb") as file:
            pickle.dump(repr_alphas, file)

        # Calculate lambdas
        repr_lambdas = [eigenvalues[-i] for i in range(1, n_components + 1)]
        # Save lambdas
        _lambdas_path = os.path.join(param_folder, f"lambdas.p")
        secho(
            f"({kernel_name}/{hyperparam}) outputting lambdas to {_lambdas_path}"
        )
        with open(_lambdas_path, 'wb') as file:
            pickle.dump(repr_lambdas, file)

        secho(
            f"({kernel_name}/{hyperparam}) projecting known vocabulary to KPCA embeddings"
        )
        repr_projection_matrix = repr_alphas / repr_lambdas

        # Calculate KPCA matrix
        if similarity_type == "ngram_intersec":  # There is no additional kernel function on top of the similarity function
            kpca_matrix = project_full_vocab_linear(
                projection_matrix=repr_projection_matrix,
                similarity_matrix=full_similarity_matrix,
            )
        elif use_gpu:
            kpca_matrix = project_words_gpu(
                projection_matrix=repr_projection_matrix,
                similarity_matrix=full_similarity_matrix,
                kernel_name=kernel_name,
                hyperparam=hyperparam,
            )
        else:
            kpca_matrix = project_similarity_matrix(
                projection_matrix=repr_projection_matrix,
                similarity_matrix=full_similarity_matrix,
                kernel_name=kernel_name,
                hyperparam=hyperparam,
            )

        # Save KPCA matrix
        _kpca_path = os.path.join(param_folder, f"kpca.npy")
        secho(
            f"({kernel_name}/{hyperparam}) outputting KPCA matrix to {_kpca_path}"
        )
        np.save(_kpca_path, kpca_matrix)