X_test = X[n / 2 :] Y_test = Y[n / 2 :] print ("Corr(X)") print (np.round(np.corrcoef(X.T), 2)) print ("Corr(Y)") print (np.round(np.corrcoef(Y.T), 2)) ############################################################################### # Canonical (symetric) PLS # Transform data # ~~~~~~~~~~~~~~ plsca = PLSCanonical(n_components=2) plsca.fit(X_train, Y_train) X_train_r, Y_train_r = plsca.transform(X_train, Y_train) X_test_r, Y_test_r = plsca.transform(X_test, Y_test) # Scatter plot of scores # ~~~~~~~~~~~~~~~~~~~~~~ # 1) On diagonal plot X vs Y scores on each components pl.figure(figsize=(12, 8)) pl.subplot(221) pl.plot(X_train_r[:, 0], Y_train_r[:, 0], "ob", label="train") pl.plot(X_test_r[:, 0], Y_test_r[:, 0], "or", label="test") pl.xlabel("x scores") pl.ylabel("y scores") pl.title("Comp. 1: X vs Y (test corr = %.2f)" % np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1]) pl.xticks(()) pl.yticks(()) pl.legend(loc="best")
# remove features that have NaNs, but keep track of them for the future idx = np.isnan(X_meg).any(axis=0) # mask of features with at least 1 NaN X_meg = X_meg[:,~idx] print 'MEG Features left: %d/%d'%(X_meg.shape[1],len(idx)) X_fmri = preprocessing.scale(X_fmri) X_meg = preprocessing.scale(X_meg) y = inatt from sklearn.pls import PLSCanonical ncomps = 10 plsca = PLSCanonical(n_components=ncomps) plsca.fit(X_meg, X_fmri) X_mc, X_fc = plsca.transform(X_meg, X_fmri) res = [] print seed, band for comp in range(ncomps): r,p = stats.pearsonr(X_mc[:,comp], y) res += [r,p] r,p = stats.pearsonr(X_fc[:,comp], y) res += [r,p] all_results.append(['%s_%d-%d'%(seed,band[0],band[1])] + res) header = [] for d in range(ncomps): header+=['meg r%d'%d, 'meg_p%d'%d, 'fmri r%d'%d, 'fmri_p%d'%d] header = ['data'] + header all_results.insert(0, header)
X_test = X[n / 2:] Y_test = Y[n / 2:] print "Corr(X)" print np.round(np.corrcoef(X.T), 2) print "Corr(Y)" print np.round(np.corrcoef(Y.T), 2) ############################################################################### # Canonical (symetric) PLS # Transform data # ~~~~~~~~~~~~~~ plsca = PLSCanonical(n_components=2) plsca.fit(X_train, Y_train) X_train_r, Y_train_r = plsca.transform(X_train, Y_train) X_test_r, Y_test_r = plsca.transform(X_test, Y_test) # Scatter plot of scores # ~~~~~~~~~~~~~~~~~~~~~~ # 1) On diagonal plot X vs Y scores on each components pl.subplot(221) pl.plot(X_train_r[:, 0], Y_train_r[:, 0], "ob", label="train") pl.plot(X_test_r[:, 0], Y_test_r[:, 0], "or", label="test") pl.xlabel("x scores") pl.ylabel("y scores") pl.title('Comp. 1: X vs Y (test corr = %.2f)' % np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1]) pl.legend() pl.subplot(224)
def siPLS(X, y): plsca = PLSCanonical(n_components=2) plsca.fit(X, y) X_r = plsca.transform(X) print X_r