X_train = X[: n / 2] Y_train = Y[: n / 2] X_test = X[n / 2 :] Y_test = Y[n / 2 :] print ("Corr(X)") print (np.round(np.corrcoef(X.T), 2)) print ("Corr(Y)") print (np.round(np.corrcoef(Y.T), 2)) ############################################################################### # Canonical (symetric) PLS # Transform data # ~~~~~~~~~~~~~~ plsca = PLSCanonical(n_components=2) plsca.fit(X_train, Y_train) X_train_r, Y_train_r = plsca.transform(X_train, Y_train) X_test_r, Y_test_r = plsca.transform(X_test, Y_test) # Scatter plot of scores # ~~~~~~~~~~~~~~~~~~~~~~ # 1) On diagonal plot X vs Y scores on each components pl.figure(figsize=(12, 8)) pl.subplot(221) pl.plot(X_train_r[:, 0], Y_train_r[:, 0], "ob", label="train") pl.plot(X_test_r[:, 0], Y_test_r[:, 0], "or", label="test") pl.xlabel("x scores") pl.ylabel("y scores") pl.title("Comp. 1: X vs Y (test corr = %.2f)" % np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1]) pl.xticks(())
X_train = X[:n / 2] Y_train = Y[:n / 2] X_test = X[n / 2:] Y_test = Y[n / 2:] print "Corr(X)" print np.round(np.corrcoef(X.T), 2) print "Corr(Y)" print np.round(np.corrcoef(Y.T), 2) ############################################################################### # Canonical (symetric) PLS # Transform data # ~~~~~~~~~~~~~~ plsca = PLSCanonical(n_components=2) plsca.fit(X_train, Y_train) X_train_r, Y_train_r = plsca.transform(X_train, Y_train) X_test_r, Y_test_r = plsca.transform(X_test, Y_test) # Scatter plot of scores # ~~~~~~~~~~~~~~~~~~~~~~ # 1) On diagonal plot X vs Y scores on each components pl.subplot(221) pl.plot(X_train_r[:, 0], Y_train_r[:, 0], "ob", label="train") pl.plot(X_test_r[:, 0], Y_test_r[:, 0], "or", label="test") pl.xlabel("x scores") pl.ylabel("y scores") pl.title('Comp. 1: X vs Y (test corr = %.2f)' % np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1]) pl.legend()
def siPLS(X, y): plsca = PLSCanonical(n_components=2) plsca.fit(X, y) X_r = plsca.transform(X) print X_r
inatt = inatt0[~idx] hi = hi0[~idx] # remove features that have NaNs, but keep track of them for the future idx = np.isnan(X_meg).any(axis=0) # mask of features with at least 1 NaN X_meg = X_meg[:,~idx] print 'MEG Features left: %d/%d'%(X_meg.shape[1],len(idx)) X_fmri = preprocessing.scale(X_fmri) X_meg = preprocessing.scale(X_meg) y = inatt from sklearn.pls import PLSCanonical ncomps = 10 plsca = PLSCanonical(n_components=ncomps) plsca.fit(X_meg, X_fmri) X_mc, X_fc = plsca.transform(X_meg, X_fmri) res = [] print seed, band for comp in range(ncomps): r,p = stats.pearsonr(X_mc[:,comp], y) res += [r,p] r,p = stats.pearsonr(X_fc[:,comp], y) res += [r,p] all_results.append(['%s_%d-%d'%(seed,band[0],band[1])] + res) header = [] for d in range(ncomps): header+=['meg r%d'%d, 'meg_p%d'%d, 'fmri r%d'%d, 'fmri_p%d'%d] header = ['data'] + header
def test_predictions(): d = load_linnerud() X = d.data Y = d.target tol = 5e-12 miter = 1000 num_comp = 2 Xorig = X.copy() Yorig = Y.copy() # SSY = np.sum(Yorig**2) # center = True scale = False pls1 = PLSRegression(n_components = num_comp, scale = scale, tol = tol, max_iter = miter, copy = True) pls1.fit(Xorig, Yorig) Yhat1 = pls1.predict(Xorig) SSYdiff1 = np.sum((Yorig-Yhat1)**2) # print "PLSRegression: R2Yhat = %.4f" % (1 - (SSYdiff1 / SSY)) # Compare PLSR and sklearn.PLSRegression pls3 = PLSR(num_comp = num_comp, center = True, scale = scale, tolerance = tol, max_iter = miter) pls3.fit(X, Y) Yhat3 = pls3.predict(X) assert_array_almost_equal(Yhat1, Yhat3, decimal = 5, err_msg = "PLSR gives wrong prediction") SSYdiff3 = np.sum((Yorig-Yhat3)**2) # print "PLSR : R2Yhat = %.4f" % (1 - (SSYdiff3 / SSY)) assert abs(SSYdiff1 - SSYdiff3) < 0.00005 pls2 = PLSCanonical(n_components = num_comp, scale = scale, tol = tol, max_iter = miter, copy = True) pls2.fit(Xorig, Yorig) Yhat2 = pls2.predict(Xorig) SSYdiff2 = np.sum((Yorig-Yhat2)**2) # print "PLSCanonical : R2Yhat = %.4f" % (1 - (SSYdiff2 / SSY)) # Compare PLSC and sklearn.PLSCanonical pls4 = PLSC(num_comp = num_comp, center = True, scale = scale, tolerance = tol, max_iter = miter) pls4.fit(X, Y) Yhat4 = pls4.predict(X) SSYdiff4 = np.sum((Yorig-Yhat4)**2) # print "PLSC : R2Yhat = %.4f" % (1 - (SSYdiff4 / SSY)) # Compare O2PLS and sklearn.PLSCanonical pls5 = O2PLS(num_comp = [num_comp, 1, 0], center = True, scale = scale, tolerance = tol, max_iter = miter) pls5.fit(X, Y) Yhat5 = pls5.predict(X) SSYdiff5 = np.sum((Yorig-Yhat5)**2) # print "O2PLS : R2Yhat = %.4f" % (1 - (SSYdiff5 / SSY)) assert abs(SSYdiff2 - SSYdiff4) < 0.00005 assert SSYdiff2 > SSYdiff5