Ejemplo n.º 1
0
def apply_ktst(K, y, iterations=10000, subjects=False, verbose=True):
    """
    Compute MMD^2_u, its null distribution and the p-value of the
    kernel two-sample test.

    Parameters:
    ----------
    K: array-like
        Kernel matrix
    y: array_like
        class labels
    verbose: bool
        Verbosity

    Returns:
    -------
    mmd2u: float
        MMD^2_u value.
    acc_null: array
        Null distribution of the MMD^2_u
    p_value: float
         p-value
    """
    assert len(np.unique(y)) == 2, 'KTST only works on binary problems'

    # Assuming that the first m rows of the kernel matrix are from one
    # class and the other n rows from the second class.
    m = len(y[y == 0])
    n = len(y[y == 1])
    mmd2u = MMD2u(K, m, n)
    if verbose:
        print("MMD^2_u = %s" % mmd2u)
        print("Computing the null distribution.")
    if subjects:
        perms = [permutation_subjects_ktst(y) for i in range(iterations)]
        mmd2u_null = compute_null_distribution_given_permutations(
            K, m, n, perms, iterations)
    else:
        mmd2u_null = compute_null_distribution(K,
                                               m,
                                               n,
                                               iterations,
                                               verbose=verbose)

    p_value = max(1.0 / iterations,
                  (mmd2u_null > mmd2u).sum() / float(iterations))
    if verbose:
        print("p-value ~= %s \t (resolution : %s)" %
              (p_value, 1.0 / iterations))

    return mmd2u, mmd2u_null, p_value
Ejemplo n.º 2
0
def apply_ktst(K, y, iterations=10000, subjects=False, verbose=True):
    """
    Compute MMD^2_u, its null distribution and the p-value of the
    kernel two-sample test.

    Parameters:
    ----------
    K: array-like
        Kernel matrix
    y: array_like
        class labels
    verbose: bool
        Verbosity

    Returns:
    -------
    mmd2u: float
        MMD^2_u value.
    acc_null: array
        Null distribution of the MMD^2_u
    p_value: float
         p-value
    """
    assert len(np.unique(y)) == 2, 'KTST only works on binary problems'

    # Assuming that the first m rows of the kernel matrix are from one
    # class and the other n rows from the second class.
    m = len(y[y == 0])
    n = len(y[y == 1])
    mmd2u = MMD2u(K, m, n)
    if verbose:
        print("MMD^2_u = %s" % mmd2u)
        print("Computing the null distribution.")
    if subjects:
        perms = [permutation_subjects_ktst(y) for i in range(iterations)]
        mmd2u_null = compute_null_distribution_given_permutations(K, m, n,
                                                                  perms,
                                                                  iterations)
    else:
        mmd2u_null = compute_null_distribution(K, m, n, iterations,
                                               verbose=verbose)

    p_value = max(1.0/iterations, (mmd2u_null > mmd2u).sum()
                  / float(iterations))
    if verbose:
        print("p-value ~= %s \t (resolution : %s)" % (p_value, 1.0/iterations))

    return mmd2u, mmd2u_null, p_value
Ejemplo n.º 3
0
        X = np.vstack([A, B])
        y = np.concatenate([np.zeros(nA), np.ones(nB)])

        distances = pairwise_distances(X, metric='euclidean')
        sigma2 = np.median(distances)**2.0
        K = np.exp(-distances * distances / sigma2)
        # K = X.dot(X.T)

        iterations = 10000
        mmd2u_unpermuted = MMD2u(K, nA, nB)
        print("mmd2u: %s" % mmd2u_unpermuted)
        mmd2us[r] = mmd2u_unpermuted

        mmd2us_null = compute_null_distribution(K,
                                                nA,
                                                nB,
                                                iterations,
                                                random_state=rng_ktst)
        p_value_mmd2u = estimate_pvalue(mmd2u_unpermuted, mmd2us_null)
        print("mmd2u p-value: %s" % p_value_mmd2u)
        p_value_mmd2us[r] = p_value_mmd2u

        scoring = 'accuracy'
        n_folds = 5
        iterations = 1
        # score_unpermuted = compute_svm_score_nestedCV(K, y, n_folds,
        #                                               scoring=scoring,
        #                                               random_state=rng_cv)

        rngs = [
            np.random.RandomState(rng_cv.randint(low=MIN_INT, high=MAX_INT))
Ejemplo n.º 4
0
        B = rng_data.multivariate_normal(muB, covB, size=nB)

        X = np.vstack([A, B])
        y = np.concatenate([np.zeros(nA), np.ones(nB)])

        distances = pairwise_distances(X, metric='euclidean')
        sigma2 = np.median(distances) ** 2.0
        K = np.exp(- distances * distances / sigma2)
        # K = X.dot(X.T)

        iterations = 10000
        mmd2u_unpermuted = MMD2u(K, nA, nB)
        print("mmd2u: %s" % mmd2u_unpermuted)
        mmd2us[r] = mmd2u_unpermuted

        mmd2us_null = compute_null_distribution(K, nA, nB, iterations,
                                                random_state=rng_ktst)
        p_value_mmd2u = estimate_pvalue(mmd2u_unpermuted, mmd2us_null)
        print("mmd2u p-value: %s" % p_value_mmd2u)
        p_value_mmd2us[r] = p_value_mmd2u

        scoring = 'accuracy'
        n_folds = 5
        iterations = 1
        # score_unpermuted = compute_svm_score_nestedCV(K, y, n_folds,
        #                                               scoring=scoring,
        #                                               random_state=rng_cv)

        rngs = [np.random.RandomState(rng_cv.randint(low=MIN_INT, high=MAX_INT)) for i in range(iterations)]
        scores_unpermuted = Parallel(n_jobs=-1)(delayed(compute_svm_score_nestedCV)(K, y, n_folds, scoring, rngs[i], param_grid=svm_param_grid) for i in range(iterations))
        score_unpermuted = np.mean(scores_unpermuted)
        print("accuracy: %s" % score_unpermuted)
def compute_mmd_struc_func(k_mat,
                           struc_b6,
                           struc_btbr,
                           func_b6,
                           func_btbr,
                           iterations=100000):
    """
    Computes the mmd values for the structural and functional problems and plot
    them with the null distributions.
    
    Parameters:
    ----------
    k_mat: ndarray
           Kernel matrix
    struc_b6: array like
           Structural vectors for B6 class
    struc_btbr: array like
           Structural vectors for BTBR class
    func_b6: array like
           Functional vectors for B6 class
    func_btbr: array like
           Functional vectors for BTBR class
    """
    #Computing the number of samples belonging to structural data in order
    #to split the kernel matrix.
    l_struc = len(struc_b6) + len(struc_btbr)

    #Computing MMD values
    struc_mmd = MMD2u(k_mat[:l_struc][:, :l_struc], len(struc_b6),
                      len(struc_btbr))
    func_mmd = MMD2u(k_mat[l_struc:][:, l_struc:], len(func_b6),
                     len(func_btbr))
    print "struc_mmd = %s, func_mmd = %s" % (struc_mmd, func_mmd)

    #Computing the null-distribution
    mmd2u_null_all = compute_null_distribution(
        k_mat,
        struc_b6.shape[0] + func_b6.shape[0],
        struc_btbr.shape[0] + func_btbr.shape[0],
        iterations,
        seed=123,
        verbose=False)
    #Computing the p-value
    struc_p_value = max(1.0 / iterations,
                        (mmd2u_null_all > struc_mmd).sum() / float(iterations))
    print("struc_p-value ~= %s \t (resolution : %s)" %
          (struc_p_value, 1.0 / iterations))
    func_p_value = max(1.0 / iterations,
                       (mmd2u_null_all > func_mmd).sum() / float(iterations))
    print("func_p-value ~= %s \t (resolution : %s)" %
          (func_p_value, 1.0 / iterations))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    prob, bins, patches = plt.hist(mmd2u_null_all, bins=50, normed=True)
    ax.plot(struc_mmd,
            prob.max() / 30,
            'w*',
            markersize=15,
            markeredgecolor='k',
            markeredgewidth=2,
            label="$Structural MMD^2_u = %s$" % struc_mmd)
    ax.plot(func_mmd,
            prob.max() / 30,
            'w^',
            markersize=15,
            markeredgecolor='k',
            markeredgewidth=2,
            label="$Functional MMD^2_u = %s$" % func_mmd)
    plt.xlabel('$MMD^2_u$')
    plt.ylabel('$p(MMD^2_u)$')
    plt.title('$MMD^2_u$: null-distribution and observed values')

    ax.annotate(
        'p-value: %s' % (struc_p_value),
        xy=(float(struc_mmd), 4.),
        xycoords='data',
        xytext=(-105, 30),
        textcoords='offset points',
        bbox=dict(boxstyle="round", fc="1."),
        arrowprops=dict(arrowstyle="->",
                        connectionstyle="angle,angleA=0,angleB=90,rad=10"),
    )

    ax.annotate(
        'p-value: %s' % (func_p_value),
        xy=(float(func_mmd), 4.),
        xycoords='data',
        xytext=(10, 30),
        textcoords='offset points',
        bbox=dict(boxstyle="round", fc="1."),
        arrowprops=dict(arrowstyle="->",
                        connectionstyle="angle,angleA=0,angleB=90,rad=10"),
    )

    plt.legend(numpoints=1)