Ejemplo n.º 1
0
def compute_kernel_matrix(
    struc_b6, struc_btbr, func_b6, func_btbr, kernel="linear", normalized=True, plot=True, **kwds
):
    """
    Computes the kernel matrix for all graphs (structural and functional)
    represented in the common space.
    
    Parameters:
    ----------
    struc_b6: array like
    struc_btbr: array like
    func_b6: array like
    func_btbr: array like
    kernel: string
            Kernel measure. The kernels implemented in sklearn are allowed.
            Possible values are 'rbf', 'sigmoid', 'polynomial', 
            'poly', 'linear', 'cosine'.
    normalized: boolean
                Whether to normalize the kernel values by
                k_normalized(a,b) = k(a,b)/np.sqrt(k(a,a)*k(b,b))
    **kwds: optional keyword parameters
            Any further parameters are passed directly to the kernel function.
    Returns:
    ------
    k_mat: ndarray
           Kernel matrix
    """
    vects = np.vstack((struc_b6, struc_btbr, func_b6, func_btbr))
    k_mat = skpw.pairwise_kernels(vects, vects, metric=kernel, **kwds)
    if normalized:
        k_norm = np.zeros(k_mat.shape)
        for i in range(len(k_mat)):
            for j in range(i, len(k_mat)):
                k_norm[i, j] = k_norm[j, i] = k_mat[i, j] / np.sqrt(k_mat[i, i] * k_mat[j, j])
        k_mat = k_norm

    if plot:
        plot_similarity_matrix(k_mat)

    return k_mat
def MMD_single_modality(data_b6, data_btbr, modality='Structural',
                             iterations=100000, plot=True):
    """
    Process the data with the following approach: Embedding + 
    RBF_kernel + KTST
    Parameters:
    -----------
    
    Return:
    ----------
        MMD distance, null_distribution, p-value
    """
    print 'Analyzing %s data' %(modality)
    
    #Concatenating the data
    vectors = np.vstack((data_b6, data_btbr))
    n_b6 = len(data_b6)
    n_btbr = len(data_btbr)
   
    sigma2 = np.median(pairwise_distances(vectors, metric='euclidean'))**2    
    k_matrix = pairwise_kernels(vectors, metric='rbf', gamma=1.0/sigma2)    
    
    if plot:
        plot_similarity_matrix(k_matrix)
    
    #Computing the MMD
    mmd2u = MMD2u(k_matrix, n_b6, n_btbr)
    print("MMD^2_u = %s" % mmd2u)    
    #Computing the null-distribution
        
    #Null distribution only on B6 mice
#    sigma2_b6 = np.median(pairwise_distances(vectors_cl1, metric='euclidean'))**2    
#    k_matrix_b6 = pairwise_kernels(vectors_cl1, metric='rbf', gamma=1.0/sigma2_b6)
#    mmd2u_null = compute_null_distribution(k_matrix_b6, 5, 5, iterations, seed=123, verbose=False)
  
    mmd2u_null = compute_null_distribution(k_matrix, n_b6, n_btbr, iterations, 
                                           seed=123, verbose=False)
    
    print np.max(mmd2u_null)
    #Computing the p-value
    p_value = max(1.0/iterations, (mmd2u_null > mmd2u).sum() / float(iterations))
    print("p-value ~= %s \t (resolution : %s)" % (p_value, 1.0/iterations))    
    print 'Number of stds from MMD^2_u to mean value of null distribution: %s' % ((mmd2u - np.mean(mmd2u_null))/np.std(mmd2u_null))
    
    if plot:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        prob, bins, patches = plt.hist(mmd2u_null, bins=50, normed=True)
        ax.plot(mmd2u, prob.max()/30, 'w*', markersize=15, 
                markeredgecolor='k', markeredgewidth=2, 
                label="$%s MMD^2_u = %s$" % (modality, mmd2u))
    #    func_p_value = max(1.0/iterations, (functional_mmd[1] > functional_mmd[0]).sum() / float(iterations))

        ax.annotate('p-value: %s' %(p_value), 
                    xy=(float(mmd2u), prob.max()/9.),  xycoords='data',
                    xytext=(-105, 30), textcoords='offset points',
                    bbox=dict(boxstyle="round", fc="1."),
                    arrowprops=dict(arrowstyle="->",
                                    connectionstyle="angle,angleA=0,angleB=90,rad=10"),
                    )
        plt.xlabel('$MMD^2_u$')
        plt.ylabel('$p(MMD^2_u)$')
        plt.legend(numpoints=1)
#        plt.title('%s_DATA: $p$-value=%s' %(modality, p_value))
        print ''