def compute_kernel_matrix( struc_b6, struc_btbr, func_b6, func_btbr, kernel="linear", normalized=True, plot=True, **kwds ): """ Computes the kernel matrix for all graphs (structural and functional) represented in the common space. Parameters: ---------- struc_b6: array like struc_btbr: array like func_b6: array like func_btbr: array like kernel: string Kernel measure. The kernels implemented in sklearn are allowed. Possible values are 'rbf', 'sigmoid', 'polynomial', 'poly', 'linear', 'cosine'. normalized: boolean Whether to normalize the kernel values by k_normalized(a,b) = k(a,b)/np.sqrt(k(a,a)*k(b,b)) **kwds: optional keyword parameters Any further parameters are passed directly to the kernel function. Returns: ------ k_mat: ndarray Kernel matrix """ vects = np.vstack((struc_b6, struc_btbr, func_b6, func_btbr)) k_mat = skpw.pairwise_kernels(vects, vects, metric=kernel, **kwds) if normalized: k_norm = np.zeros(k_mat.shape) for i in range(len(k_mat)): for j in range(i, len(k_mat)): k_norm[i, j] = k_norm[j, i] = k_mat[i, j] / np.sqrt(k_mat[i, i] * k_mat[j, j]) k_mat = k_norm if plot: plot_similarity_matrix(k_mat) return k_mat
def MMD_single_modality(data_b6, data_btbr, modality='Structural', iterations=100000, plot=True): """ Process the data with the following approach: Embedding + RBF_kernel + KTST Parameters: ----------- Return: ---------- MMD distance, null_distribution, p-value """ print 'Analyzing %s data' %(modality) #Concatenating the data vectors = np.vstack((data_b6, data_btbr)) n_b6 = len(data_b6) n_btbr = len(data_btbr) sigma2 = np.median(pairwise_distances(vectors, metric='euclidean'))**2 k_matrix = pairwise_kernels(vectors, metric='rbf', gamma=1.0/sigma2) if plot: plot_similarity_matrix(k_matrix) #Computing the MMD mmd2u = MMD2u(k_matrix, n_b6, n_btbr) print("MMD^2_u = %s" % mmd2u) #Computing the null-distribution #Null distribution only on B6 mice # sigma2_b6 = np.median(pairwise_distances(vectors_cl1, metric='euclidean'))**2 # k_matrix_b6 = pairwise_kernels(vectors_cl1, metric='rbf', gamma=1.0/sigma2_b6) # mmd2u_null = compute_null_distribution(k_matrix_b6, 5, 5, iterations, seed=123, verbose=False) mmd2u_null = compute_null_distribution(k_matrix, n_b6, n_btbr, iterations, seed=123, verbose=False) print np.max(mmd2u_null) #Computing the p-value p_value = max(1.0/iterations, (mmd2u_null > mmd2u).sum() / float(iterations)) print("p-value ~= %s \t (resolution : %s)" % (p_value, 1.0/iterations)) print 'Number of stds from MMD^2_u to mean value of null distribution: %s' % ((mmd2u - np.mean(mmd2u_null))/np.std(mmd2u_null)) if plot: fig = plt.figure() ax = fig.add_subplot(111) prob, bins, patches = plt.hist(mmd2u_null, bins=50, normed=True) ax.plot(mmd2u, prob.max()/30, 'w*', markersize=15, markeredgecolor='k', markeredgewidth=2, label="$%s MMD^2_u = %s$" % (modality, mmd2u)) # func_p_value = max(1.0/iterations, (functional_mmd[1] > functional_mmd[0]).sum() / float(iterations)) ax.annotate('p-value: %s' %(p_value), xy=(float(mmd2u), prob.max()/9.), xycoords='data', xytext=(-105, 30), textcoords='offset points', bbox=dict(boxstyle="round", fc="1."), arrowprops=dict(arrowstyle="->", connectionstyle="angle,angleA=0,angleB=90,rad=10"), ) plt.xlabel('$MMD^2_u$') plt.ylabel('$p(MMD^2_u)$') plt.legend(numpoints=1) # plt.title('%s_DATA: $p$-value=%s' %(modality, p_value)) print ''