Example #1
0
def test_pls():
    n_components = 2
    # 1) Canonical (symetric) PLS (PLS 2 blocks canonical mode A)
    # ===========================================================
    # Compare 2 algo.: nipals vs. svd
    # ------------------------------
    pls_bynipals = pls.PLSCanonical(n_components=n_components)
    pls_bynipals.fit(X, Y)
    pls_bysvd = pls.PLSCanonical(algorithm="svd", n_components=n_components)
    pls_bysvd.fit(X, Y)
    # check that the loading vectors are highly correlated
    assert_array_almost_equal(
        [
            np.abs(np.corrcoef(pls_bynipals.x_loadings_[:, k], pls_bysvd.x_loadings_[:, k])[1, 0])
            for k in xrange(n_components)
        ],
        np.ones(n_components),
        err_msg="nipals and svd implementation lead to different x loadings",
    )

    assert_array_almost_equal(
        [
            np.abs(np.corrcoef(pls_bynipals.y_loadings_[:, k], pls_bysvd.y_loadings_[:, k])[1, 0])
            for k in xrange(n_components)
        ],
        np.ones(n_components),
        err_msg="nipals and svd implementation lead to different y loadings",
    )

    # Check PLS properties (with n_components=X.shape[1])
    # ---------------------------------------------------
    plsca = pls.PLSCanonical(n_components=X.shape[1])
    plsca.fit(X, Y)
    T = plsca.x_scores_
    P = plsca.x_loadings_
    Wx = plsca.x_weights_
    U = plsca.y_scores_
    Q = plsca.y_loadings_
    Wy = plsca.y_weights_

    def check_ortho(M, err_msg):
        K = np.dot(M.T, M)
        assert_array_almost_equal(K, np.diag(np.diag(K)), err_msg=err_msg)

    # Orthogonality of weights
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(Wx, "x weights are not orthogonal")
    check_ortho(Wy, "y weights are not orthogonal")

    # Orthogonality of latent scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(T, "x scores are not orthogonal")
    check_ortho(U, "y scores are not orthogonal")

    # Check X = TP' and Y = UQ' (with (p == q) components)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # center scale X, Y
    Xc, Yc, x_mean, y_mean, x_std, y_std = pls._center_scale_xy(X.copy(), Y.copy(), scale=True)
    assert_array_almost_equal(Xc, np.dot(T, P.T), err_msg="X != TP'")
    assert_array_almost_equal(Yc, np.dot(U, Q.T), err_msg="Y != UQ'")

    # Check that rotations on training data lead to scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Xr = plsca.transform(X)
    assert_array_almost_equal(Xr, plsca.x_scores_, err_msg="rotation on X failed")
    Xr, Yr = plsca.transform(X, Y)
    assert_array_almost_equal(Xr, plsca.x_scores_, err_msg="rotation on X failed")
    assert_array_almost_equal(Yr, plsca.y_scores_, err_msg="rotation on Y failed")

    # "Non regression test" on canonical PLS
    # --------------------------------------
    pls_bynipals = pls.PLSCanonical(n_components=n_components)
    pls_bynipals.fit(X, Y)

    pls_ca = pls_bynipals
    x_loadings = np.array([[-0.61470416, 0.37877695], [-0.65625755, 0.01196893], [-0.51733059, -0.93984954]])
    assert_array_almost_equal(pls_ca.x_loadings_, x_loadings)

    y_loadings = np.array([[0.66591533, 0.77358148], [0.67602364, -0.62871191], [-0.35892128, -0.11981924]])
    assert_array_almost_equal(pls_ca.y_loadings_, y_loadings)

    x_weights = np.array([[-0.61330704, 0.25616119], [-0.74697144, 0.11930791], [-0.25668686, -0.95924297]])
    assert_array_almost_equal(pls_ca.x_weights_, x_weights)

    y_weights = np.array([[0.58989127, 0.7890047], [0.77134053, -0.61351791], [-0.2388767, -0.03267062]])
    assert_array_almost_equal(pls_ca.y_weights_, y_weights)

    # 2) Regression PLS (PLS2): "Non regression test"
    # ===============================================
    pls2 = pls.PLSRegression(n_components=n_components)
    pls2.fit(X, Y)

    x_loadings = np.array([[-0.61470416, -0.24574278], [-0.65625755, -0.14396183], [-0.51733059, 1.00609417]])
    assert_array_almost_equal(pls2.x_loadings_, x_loadings)

    y_loadings = np.array([[0.32456184, 0.29892183], [0.42439636, 0.61970543], [-0.13143144, -0.26348971]])
    assert_array_almost_equal(pls2.y_loadings_, y_loadings)

    x_weights = np.array([[-0.61330704, -0.00443647], [-0.74697144, -0.32172099], [-0.25668686, 0.94682413]])
    assert_array_almost_equal(pls2.x_weights_, x_weights)

    y_weights = np.array([[0.58989127, 0.40572461], [0.77134053, 0.84112205], [-0.2388767, -0.35763282]])
    assert_array_almost_equal(pls2.y_weights_, y_weights)

    ypred_2 = np.array([[180.33278555, 35.57034871, 56.06817703], [192.06235219, 37.95306771, 54.12925192]])

    assert_array_almost_equal(pls2.predict(X[:2]), ypred_2)
Example #2
0
def test_pls():
    d = load_linnerud()
    X = d.data
    Y = d.target
    # 1) Canonical (symetric) PLS (PLS 2 blocks canonical mode A)
    # ===========================================================
    # Compare 2 algo.: nipals vs. svd
    # ------------------------------
    pls_bynipals = pls.PLSCanonical(n_components=X.shape[1])
    pls_bynipals.fit(X, Y)
    pls_bysvd = pls.PLSCanonical(algorithm="svd", n_components=X.shape[1])
    pls_bysvd.fit(X, Y)
    # check equalities of loading (up to the sign of the second column)
    assert_array_almost_equal(
        pls_bynipals.x_loadings_,
        np.multiply(pls_bysvd.x_loadings_, np.array([1, -1, 1])),
        decimal=5,
        err_msg="nipals and svd implementation lead to different x loadings")

    assert_array_almost_equal(
        pls_bynipals.y_loadings_,
        np.multiply(pls_bysvd.y_loadings_, np.array([1, -1, 1])),
        decimal=5,
        err_msg="nipals and svd implementation lead to different y loadings")

    # Check PLS properties (with n_components=X.shape[1])
    # ---------------------------------------------------
    plsca = pls.PLSCanonical(n_components=X.shape[1])
    plsca.fit(X, Y)
    T = plsca.x_scores_
    P = plsca.x_loadings_
    Wx = plsca.x_weights_
    U = plsca.y_scores_
    Q = plsca.y_loadings_
    Wy = plsca.y_weights_

    def check_ortho(M, err_msg):
        K = np.dot(M.T, M)
        assert_array_almost_equal(K, np.diag(np.diag(K)), err_msg=err_msg)

    # Orthogonality of weights
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(Wx, "x weights are not orthogonal")
    check_ortho(Wy, "y weights are not orthogonal")

    # Orthogonality of latent scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(T, "x scores are not orthogonal")
    check_ortho(U, "y scores are not orthogonal")

    # Check X = TP' and Y = UQ' (with (p == q) components)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # center scale X, Y
    Xc, Yc, x_mean, y_mean, x_std, y_std =\
         pls._center_scale_xy(X.copy(), Y.copy(), scale=True)
    assert_array_almost_equal(Xc, np.dot(T, P.T), err_msg="X != TP'")
    assert_array_almost_equal(Yc, np.dot(U, Q.T), err_msg="Y != UQ'")

    # Check that rotations on training data lead to scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Xr = plsca.transform(X)
    assert_array_almost_equal(Xr,
                              plsca.x_scores_,
                              err_msg="rotation on X failed")
    Xr, Yr = plsca.transform(X, Y)
    assert_array_almost_equal(Xr,
                              plsca.x_scores_,
                              err_msg="rotation on X failed")
    assert_array_almost_equal(Yr,
                              plsca.y_scores_,
                              err_msg="rotation on Y failed")

    # "Non regression test" on canonical PLS
    # --------------------------------------
    # The results were checked against the R-package plspm
    pls_ca = pls.PLSCanonical(n_components=X.shape[1])
    pls_ca.fit(X, Y)

    x_weights = np.array([[-0.61330704, 0.25616119, -0.74715187],
                          [-0.74697144, 0.11930791, 0.65406368],
                          [-0.25668686, -0.95924297, -0.11817271]])
    assert_array_almost_equal(pls_ca.x_weights_, x_weights)

    x_rotations = np.array([[-0.61330704, 0.41591889, -0.62297525],
                            [-0.74697144, 0.31388326, 0.77368233],
                            [-0.25668686, -0.89237972, -0.24121788]])
    assert_array_almost_equal(pls_ca.x_rotations_, x_rotations)

    y_weights = np.array([[+0.58989127, 0.7890047, 0.1717553],
                          [+0.77134053, -0.61351791, 0.16920272],
                          [-0.23887670, -0.03267062, 0.97050016]])
    assert_array_almost_equal(pls_ca.y_weights_, y_weights)

    y_rotations = np.array([[+0.58989127, 0.7168115, 0.30665872],
                            [+0.77134053, -0.70791757, 0.19786539],
                            [-0.23887670, -0.00343595, 0.94162826]])
    assert_array_almost_equal(pls_ca.y_rotations_, y_rotations)

    # 2) Regression PLS (PLS2): "Non regression test"
    # ===============================================
    # The results were checked against the R-packages plspm, misOmics and pls
    pls_2 = pls.PLSRegression(n_components=X.shape[1])
    pls_2.fit(X, Y)

    x_weights = np.array([[-0.61330704, -0.00443647, 0.78983213],
                          [-0.74697144, -0.32172099, -0.58183269],
                          [-0.25668686, 0.94682413, -0.19399983]])
    assert_array_almost_equal(pls_2.x_weights_, x_weights)

    x_loadings = np.array([[-0.61470416, -0.24574278, 0.78983213],
                           [-0.65625755, -0.14396183, -0.58183269],
                           [-0.51733059, 1.00609417, -0.19399983]])
    assert_array_almost_equal(pls_2.x_loadings_, x_loadings)

    y_weights = np.array([[+0.32456184, 0.29892183, 0.20316322],
                          [+0.42439636, 0.61970543, 0.19320542],
                          [-0.13143144, -0.26348971, -0.17092916]])
    assert_array_almost_equal(pls_2.y_weights_, y_weights)

    y_loadings = np.array([[+0.32456184, 0.29892183, 0.20316322],
                           [+0.42439636, 0.61970543, 0.19320542],
                           [-0.13143144, -0.26348971, -0.17092916]])
    assert_array_almost_equal(pls_2.y_loadings_, y_loadings)

    # 3) Another non-regression test of Canonical PLS on random dataset
    # =================================================================
    # The results were checked against the R-package plspm
    n = 500
    p_noise = 10
    q_noise = 5
    # 2 latents vars:
    np.random.seed(11)
    l1 = np.random.normal(size=n)
    l2 = np.random.normal(size=n)
    latents = np.array([l1, l1, l2, l2]).T
    X = latents + np.random.normal(size=4 * n).reshape((n, 4))
    Y = latents + np.random.normal(size=4 * n).reshape((n, 4))
    X = np.concatenate(
        (X, np.random.normal(size=p_noise * n).reshape(n, p_noise)), axis=1)
    Y = np.concatenate(
        (Y, np.random.normal(size=q_noise * n).reshape(n, q_noise)), axis=1)
    np.random.seed(None)
    pls_ca = pls.PLSCanonical(n_components=3)
    pls_ca.fit(X, Y)

    x_weights = np.array([[0.65803719, 0.19197924, 0.21769083],
                          [0.7009113, 0.13303969, -0.15376699],
                          [0.13528197, -0.68636408, 0.13856546],
                          [0.16854574, -0.66788088, -0.12485304],
                          [-0.03232333, -0.04189855, 0.40690153],
                          [0.1148816, -0.09643158, 0.1613305],
                          [0.04792138, -0.02384992, 0.17175319],
                          [-0.06781, -0.01666137, -0.18556747],
                          [-0.00266945, -0.00160224, 0.11893098],
                          [-0.00849528, -0.07706095, 0.1570547],
                          [-0.00949471, -0.02964127, 0.34657036],
                          [-0.03572177, 0.0945091, 0.3414855],
                          [0.05584937, -0.02028961, -0.57682568],
                          [0.05744254, -0.01482333, -0.17431274]])
    assert_array_almost_equal(pls_ca.x_weights_, x_weights)

    x_loadings = np.array([[0.65649254, 0.1847647, 0.15270699],
                           [0.67554234, 0.15237508, -0.09182247],
                           [0.19219925, -0.67750975, 0.08673128],
                           [0.2133631, -0.67034809, -0.08835483],
                           [-0.03178912, -0.06668336, 0.43395268],
                           [0.15684588, -0.13350241, 0.20578984],
                           [0.03337736, -0.03807306, 0.09871553],
                           [-0.06199844, 0.01559854, -0.1881785],
                           [0.00406146, -0.00587025, 0.16413253],
                           [-0.00374239, -0.05848466, 0.19140336],
                           [0.00139214, -0.01033161, 0.32239136],
                           [-0.05292828, 0.0953533, 0.31916881],
                           [0.04031924, -0.01961045, -0.65174036],
                           [0.06172484, -0.06597366, -0.1244497]])
    assert_array_almost_equal(pls_ca.x_loadings_, x_loadings)

    y_weights = np.array([[0.66101097, 0.18672553, 0.22826092],
                          [0.69347861, 0.18463471, -0.23995597],
                          [0.14462724, -0.66504085, 0.17082434],
                          [0.22247955, -0.6932605, -0.09832993],
                          [0.07035859, 0.00714283, 0.67810124],
                          [0.07765351, -0.0105204, -0.44108074],
                          [-0.00917056, 0.04322147, 0.10062478],
                          [-0.01909512, 0.06182718, 0.28830475],
                          [0.01756709, 0.04797666, 0.32225745]])
    assert_array_almost_equal(pls_ca.y_weights_, y_weights)

    y_loadings = np.array([[0.68568625, 0.1674376, 0.0969508],
                           [0.68782064, 0.20375837, -0.1164448],
                           [0.11712173, -0.68046903, 0.12001505],
                           [0.17860457, -0.6798319, -0.05089681],
                           [0.06265739, -0.0277703, 0.74729584],
                           [0.0914178, 0.00403751, -0.5135078],
                           [-0.02196918, -0.01377169, 0.09564505],
                           [-0.03288952, 0.09039729, 0.31858973],
                           [0.04287624, 0.05254676, 0.27836841]])
    assert_array_almost_equal(pls_ca.y_loadings_, y_loadings)

    # Orthogonality of weights
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(pls_ca.x_weights_, "x weights are not orthogonal")
    check_ortho(pls_ca.y_weights_, "y weights are not orthogonal")

    # Orthogonality of latent scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(pls_ca.x_scores_, "x scores are not orthogonal")
    check_ortho(pls_ca.y_scores_, "y scores are not orthogonal")
Example #3
0
def test_pls():
    n_components = 2
    # 1) Canonical (symetric) PLS (PLS 2 blocks canonical mode A)
    # ===========================================================
    # Compare 2 algo.: nipals vs. svd
    # ------------------------------
    pls_bynipals = pls.PLSCanonical(n_components=n_components)
    pls_bynipals.fit(X, Y)
    pls_bysvd = pls.PLSCanonical(algorithm="svd", n_components=n_components)
    pls_bysvd.fit(X, Y)
    # check that the loading vectors are highly correlated
    assert_array_almost_equal(
        [
            np.abs(
                np.corrcoef(pls_bynipals.x_loadings_[:, k],
                            pls_bysvd.x_loadings_[:, k])[1, 0])
            for k in xrange(n_components)
        ],
        np.ones(n_components),
        err_msg="nipals and svd implementation lead to different x loadings")

    assert_array_almost_equal(
        [
            np.abs(
                np.corrcoef(pls_bynipals.y_loadings_[:, k],
                            pls_bysvd.y_loadings_[:, k])[1, 0])
            for k in xrange(n_components)
        ],
        np.ones(n_components),
        err_msg="nipals and svd implementation lead to different y loadings")

    # Check PLS properties (with n_components=X.shape[1])
    # ---------------------------------------------------
    plsca = pls.PLSCanonical(n_components=X.shape[1])
    plsca.fit(X, Y)
    T = plsca.x_scores_
    P = plsca.x_loadings_
    Wx = plsca.x_weights_
    U = plsca.y_scores_
    Q = plsca.y_loadings_
    Wy = plsca.y_weights_

    def check_ortho(M, err_msg):
        K = np.dot(M.T, M)
        assert_array_almost_equal(K, np.diag(np.diag(K)), err_msg=err_msg)

    # Orthogonality of weights
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(Wx, "x weights are not orthogonal")
    check_ortho(Wy, "y weights are not orthogonal")

    # Orthogonality of latent scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(T, "x scores are not orthogonal")
    check_ortho(U, "y scores are not orthogonal")

    # Check X = TP' and Y = UQ' (with (p == q) components)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # center scale X, Y
    Xc, Yc, x_mean, y_mean, x_std, y_std =\
         pls._center_scale_xy(X.copy(), Y.copy(), scale=True)
    assert_array_almost_equal(Xc, np.dot(T, P.T), err_msg="X != TP'")
    assert_array_almost_equal(Yc, np.dot(U, Q.T), err_msg="Y != UQ'")

    # Check that rotations on training data lead to scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Xr = plsca.transform(X)
    assert_array_almost_equal(Xr,
                              plsca.x_scores_,
                              err_msg="rotation on X failed")
    Xr, Yr = plsca.transform(X, Y)
    assert_array_almost_equal(Xr,
                              plsca.x_scores_,
                              err_msg="rotation on X failed")
    assert_array_almost_equal(Yr,
                              plsca.y_scores_,
                              err_msg="rotation on Y failed")

    # "Non regression test" on canonical PLS
    # --------------------------------------
    pls_bynipals = pls.PLSCanonical(n_components=n_components)
    pls_bynipals.fit(X, Y)

    pls_ca = pls_bynipals
    x_loadings = np.array([[-0.61470416,
                            0.37877695], [-0.65625755, 0.01196893],
                           [-0.51733059, -0.93984954]])
    assert_array_almost_equal(pls_ca.x_loadings_, x_loadings)

    y_loadings = np.array([[0.66591533, 0.77358148], [0.67602364, -0.62871191],
                           [-0.35892128, -0.11981924]])
    assert_array_almost_equal(pls_ca.y_loadings_, y_loadings)

    x_weights = np.array([[-0.61330704, 0.25616119], [-0.74697144, 0.11930791],
                          [-0.25668686, -0.95924297]])
    assert_array_almost_equal(pls_ca.x_weights_, x_weights)

    y_weights = np.array([[0.58989127, 0.7890047], [0.77134053, -0.61351791],
                          [-0.2388767, -0.03267062]])
    assert_array_almost_equal(pls_ca.y_weights_, y_weights)

    # 2) Regression PLS (PLS2): "Non regression test"
    # ===============================================
    pls2 = pls.PLSRegression(n_components=n_components)
    pls2.fit(X, Y)

    x_loadings = np.array([[-0.61470416, -0.24574278],
                           [-0.65625755, -0.14396183],
                           [-0.51733059, 1.00609417]])
    assert_array_almost_equal(pls2.x_loadings_, x_loadings)

    y_loadings = np.array([[0.32456184, 0.29892183], [0.42439636, 0.61970543],
                           [-0.13143144, -0.26348971]])
    assert_array_almost_equal(pls2.y_loadings_, y_loadings)

    x_weights = np.array([[-0.61330704,
                           -0.00443647], [-0.74697144, -0.32172099],
                          [-0.25668686, 0.94682413]])
    assert_array_almost_equal(pls2.x_weights_, x_weights)

    y_weights = np.array([[0.58989127, 0.40572461], [0.77134053, 0.84112205],
                          [-0.2388767, -0.35763282]])
    assert_array_almost_equal(pls2.y_weights_, y_weights)

    ypred_2 = np.array([[180.33278555, 35.57034871, 56.06817703],
                        [192.06235219, 37.95306771, 54.12925192]])

    assert_array_almost_equal(pls2.predict(X[:2]), ypred_2)
Example #4
0
def test_pls():
    d = load_linnerud()
    X = d.data
    Y = d.target
    # 1) Canonical (symetric) PLS (PLS 2 blocks canonical mode A)
    # ===========================================================
    # Compare 2 algo.: nipals vs. svd
    # ------------------------------
    pls_bynipals = pls.PLSCanonical(n_components=X.shape[1])
    pls_bynipals.fit(X, Y)
    pls_bysvd = pls.PLSCanonical(algorithm="svd", n_components=X.shape[1])
    pls_bysvd.fit(X, Y)
    # check equalities of loading (up to the sign of the second column)
    assert_array_almost_equal(
        pls_bynipals.x_loadings_,
        np.multiply(pls_bysvd.x_loadings_, np.array([1, -1, 1])),
        decimal=5,
        err_msg="nipals and svd implementation lead to different x loadings",
    )

    assert_array_almost_equal(
        pls_bynipals.y_loadings_,
        np.multiply(pls_bysvd.y_loadings_, np.array([1, -1, 1])),
        decimal=5,
        err_msg="nipals and svd implementation lead to different y loadings",
    )

    # Check PLS properties (with n_components=X.shape[1])
    # ---------------------------------------------------
    plsca = pls.PLSCanonical(n_components=X.shape[1])
    plsca.fit(X, Y)
    T = plsca.x_scores_
    P = plsca.x_loadings_
    Wx = plsca.x_weights_
    U = plsca.y_scores_
    Q = plsca.y_loadings_
    Wy = plsca.y_weights_

    def check_ortho(M, err_msg):
        K = np.dot(M.T, M)
        assert_array_almost_equal(K, np.diag(np.diag(K)), err_msg=err_msg)

    # Orthogonality of weights
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(Wx, "x weights are not orthogonal")
    check_ortho(Wy, "y weights are not orthogonal")

    # Orthogonality of latent scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(T, "x scores are not orthogonal")
    check_ortho(U, "y scores are not orthogonal")

    # Check X = TP' and Y = UQ' (with (p == q) components)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # center scale X, Y
    Xc, Yc, x_mean, y_mean, x_std, y_std = pls._center_scale_xy(X.copy(), Y.copy(), scale=True)
    assert_array_almost_equal(Xc, np.dot(T, P.T), err_msg="X != TP'")
    assert_array_almost_equal(Yc, np.dot(U, Q.T), err_msg="Y != UQ'")

    # Check that rotations on training data lead to scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Xr = plsca.transform(X)
    assert_array_almost_equal(Xr, plsca.x_scores_, err_msg="rotation on X failed")
    Xr, Yr = plsca.transform(X, Y)
    assert_array_almost_equal(Xr, plsca.x_scores_, err_msg="rotation on X failed")
    assert_array_almost_equal(Yr, plsca.y_scores_, err_msg="rotation on Y failed")

    # "Non regression test" on canonical PLS
    # --------------------------------------
    # The results were checked against the R-package plspm
    pls_ca = pls.PLSCanonical(n_components=X.shape[1])
    pls_ca.fit(X, Y)

    x_weights = np.array(
        [
            [-0.61330704, 0.25616119, -0.74715187],
            [-0.74697144, 0.11930791, 0.65406368],
            [-0.25668686, -0.95924297, -0.11817271],
        ]
    )
    assert_array_almost_equal(pls_ca.x_weights_, x_weights)

    x_rotations = np.array(
        [
            [-0.61330704, 0.41591889, -0.62297525],
            [-0.74697144, 0.31388326, 0.77368233],
            [-0.25668686, -0.89237972, -0.24121788],
        ]
    )
    assert_array_almost_equal(pls_ca.x_rotations_, x_rotations)

    y_weights = np.array(
        [
            [+0.58989127, 0.7890047, 0.1717553],
            [+0.77134053, -0.61351791, 0.16920272],
            [-0.23887670, -0.03267062, 0.97050016],
        ]
    )
    assert_array_almost_equal(pls_ca.y_weights_, y_weights)

    y_rotations = np.array(
        [
            [+0.58989127, 0.7168115, 0.30665872],
            [+0.77134053, -0.70791757, 0.19786539],
            [-0.23887670, -0.00343595, 0.94162826],
        ]
    )
    assert_array_almost_equal(pls_ca.y_rotations_, y_rotations)

    # 2) Regression PLS (PLS2): "Non regression test"
    # ===============================================
    # The results were checked against the R-packages plspm, misOmics and pls
    pls_2 = pls.PLSRegression(n_components=X.shape[1])
    pls_2.fit(X, Y)

    x_weights = np.array(
        [
            [-0.61330704, -0.00443647, 0.78983213],
            [-0.74697144, -0.32172099, -0.58183269],
            [-0.25668686, 0.94682413, -0.19399983],
        ]
    )
    assert_array_almost_equal(pls_2.x_weights_, x_weights)

    x_loadings = np.array(
        [
            [-0.61470416, -0.24574278, 0.78983213],
            [-0.65625755, -0.14396183, -0.58183269],
            [-0.51733059, 1.00609417, -0.19399983],
        ]
    )
    assert_array_almost_equal(pls_2.x_loadings_, x_loadings)

    y_weights = np.array(
        [
            [+0.32456184, 0.29892183, 0.20316322],
            [+0.42439636, 0.61970543, 0.19320542],
            [-0.13143144, -0.26348971, -0.17092916],
        ]
    )
    assert_array_almost_equal(pls_2.y_weights_, y_weights)

    y_loadings = np.array(
        [
            [+0.32456184, 0.29892183, 0.20316322],
            [+0.42439636, 0.61970543, 0.19320542],
            [-0.13143144, -0.26348971, -0.17092916],
        ]
    )
    assert_array_almost_equal(pls_2.y_loadings_, y_loadings)

    # 3) Another non-regression test of Canonical PLS on random dataset
    # =================================================================
    # The results were checked against the R-package plspm
    n = 500
    p_noise = 10
    q_noise = 5
    # 2 latents vars:
    np.random.seed(11)
    l1 = np.random.normal(size=n)
    l2 = np.random.normal(size=n)
    latents = np.array([l1, l1, l2, l2]).T
    X = latents + np.random.normal(size=4 * n).reshape((n, 4))
    Y = latents + np.random.normal(size=4 * n).reshape((n, 4))
    X = np.concatenate((X, np.random.normal(size=p_noise * n).reshape(n, p_noise)), axis=1)
    Y = np.concatenate((Y, np.random.normal(size=q_noise * n).reshape(n, q_noise)), axis=1)
    np.random.seed(None)
    pls_ca = pls.PLSCanonical(n_components=3)
    pls_ca.fit(X, Y)

    x_weights = np.array(
        [
            [0.65803719, 0.19197924, 0.21769083],
            [0.7009113, 0.13303969, -0.15376699],
            [0.13528197, -0.68636408, 0.13856546],
            [0.16854574, -0.66788088, -0.12485304],
            [-0.03232333, -0.04189855, 0.40690153],
            [0.1148816, -0.09643158, 0.1613305],
            [0.04792138, -0.02384992, 0.17175319],
            [-0.06781, -0.01666137, -0.18556747],
            [-0.00266945, -0.00160224, 0.11893098],
            [-0.00849528, -0.07706095, 0.1570547],
            [-0.00949471, -0.02964127, 0.34657036],
            [-0.03572177, 0.0945091, 0.3414855],
            [0.05584937, -0.02028961, -0.57682568],
            [0.05744254, -0.01482333, -0.17431274],
        ]
    )
    assert_array_almost_equal(pls_ca.x_weights_, x_weights)

    x_loadings = np.array(
        [
            [0.65649254, 0.1847647, 0.15270699],
            [0.67554234, 0.15237508, -0.09182247],
            [0.19219925, -0.67750975, 0.08673128],
            [0.2133631, -0.67034809, -0.08835483],
            [-0.03178912, -0.06668336, 0.43395268],
            [0.15684588, -0.13350241, 0.20578984],
            [0.03337736, -0.03807306, 0.09871553],
            [-0.06199844, 0.01559854, -0.1881785],
            [0.00406146, -0.00587025, 0.16413253],
            [-0.00374239, -0.05848466, 0.19140336],
            [0.00139214, -0.01033161, 0.32239136],
            [-0.05292828, 0.0953533, 0.31916881],
            [0.04031924, -0.01961045, -0.65174036],
            [0.06172484, -0.06597366, -0.1244497],
        ]
    )
    assert_array_almost_equal(pls_ca.x_loadings_, x_loadings)

    y_weights = np.array(
        [
            [0.66101097, 0.18672553, 0.22826092],
            [0.69347861, 0.18463471, -0.23995597],
            [0.14462724, -0.66504085, 0.17082434],
            [0.22247955, -0.6932605, -0.09832993],
            [0.07035859, 0.00714283, 0.67810124],
            [0.07765351, -0.0105204, -0.44108074],
            [-0.00917056, 0.04322147, 0.10062478],
            [-0.01909512, 0.06182718, 0.28830475],
            [0.01756709, 0.04797666, 0.32225745],
        ]
    )
    assert_array_almost_equal(pls_ca.y_weights_, y_weights)

    y_loadings = np.array(
        [
            [0.68568625, 0.1674376, 0.0969508],
            [0.68782064, 0.20375837, -0.1164448],
            [0.11712173, -0.68046903, 0.12001505],
            [0.17860457, -0.6798319, -0.05089681],
            [0.06265739, -0.0277703, 0.74729584],
            [0.0914178, 0.00403751, -0.5135078],
            [-0.02196918, -0.01377169, 0.09564505],
            [-0.03288952, 0.09039729, 0.31858973],
            [0.04287624, 0.05254676, 0.27836841],
        ]
    )
    assert_array_almost_equal(pls_ca.y_loadings_, y_loadings)

    # Orthogonality of weights
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(pls_ca.x_weights_, "x weights are not orthogonal")
    check_ortho(pls_ca.y_weights_, "y weights are not orthogonal")

    # Orthogonality of latent scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(pls_ca.x_scores_, "x scores are not orthogonal")
    check_ortho(pls_ca.y_scores_, "y scores are not orthogonal")
Example #5
0
def test_pls():
    # 1) Canonical (symetric) PLS (PLS 2 blocks canonical mode A)
    # ===========================================================
    # Compare 2 algo.: nipals vs. svd
    # ------------------------------
    pls_bynipals = pls.PLSCanonical(n_components=X.shape[1])
    pls_bynipals.fit(X, Y)
    pls_bysvd = pls.PLSCanonical(algorithm="svd", n_components=X.shape[1])
    pls_bysvd.fit(X, Y)
    # check equalities of loading (up to the sign of the second column)
    assert_array_almost_equal(
        pls_bynipals.x_loadings_,
        np.multiply(pls_bysvd.x_loadings_, np.array([1, -1, 1])), decimal=5,
        err_msg="nipals and svd implementation lead to different x loadings")

    assert_array_almost_equal(
        pls_bynipals.y_loadings_,
        np.multiply(pls_bysvd.y_loadings_, np.array([1, -1, 1])), decimal=5,
        err_msg="nipals and svd implementation lead to different y loadings")

    # Check PLS properties (with n_components=X.shape[1])
    # ---------------------------------------------------
    plsca = pls.PLSCanonical(n_components=X.shape[1])
    plsca.fit(X, Y)
    T = plsca.x_scores_
    P = plsca.x_loadings_
    Wx = plsca.x_weights_
    U = plsca.y_scores_
    Q = plsca.y_loadings_
    Wy = plsca.y_weights_

    def check_ortho(M, err_msg):
        K = np.dot(M.T, M)
        assert_array_almost_equal(K, np.diag(np.diag(K)), err_msg=err_msg)

    # Orthogonality of weights
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(Wx, "x weights are not orthogonal")
    check_ortho(Wy, "y weights are not orthogonal")

    # Orthogonality of latent scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    check_ortho(T, "x scores are not orthogonal")
    check_ortho(U, "y scores are not orthogonal")

    # Check X = TP' and Y = UQ' (with (p == q) components)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # center scale X, Y
    Xc, Yc, x_mean, y_mean, x_std, y_std =\
         pls._center_scale_xy(X.copy(), Y.copy(), scale=True)
    assert_array_almost_equal(Xc, np.dot(T, P.T),
        err_msg="X != TP'")
    assert_array_almost_equal(Yc, np.dot(U, Q.T),
        err_msg="Y != UQ'")

    # Check that rotations on training data lead to scores
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Xr = plsca.transform(X)
    assert_array_almost_equal(Xr, plsca.x_scores_,
        err_msg="rotation on X failed")
    Xr, Yr = plsca.transform(X, Y)
    assert_array_almost_equal(Xr, plsca.x_scores_,
        err_msg="rotation on X failed")
    assert_array_almost_equal(Yr, plsca.y_scores_,
        err_msg="rotation on Y failed")

    # "Non regression test" on canonical PLS
    # --------------------------------------
    pls_ca = pls.PLSCanonical(n_components=X.shape[1])
    pls_ca.fit(X, Y)

    x_weights = np.array(
        [[-0.61330704,  0.25616119, -0.74715187],
         [-0.74697144,  0.11930791,  0.65406368],
         [-0.25668686, -0.95924297, -0.11817271]])
    assert_array_almost_equal(pls_ca.x_weights_, x_weights)

    x_rotations = np.array(
        [[-0.61330704,  0.41591889, -0.62297525],
         [-0.74697144,  0.31388326,  0.77368233],
         [-0.25668686, -0.89237972, -0.24121788]])
    assert_array_almost_equal(pls_ca.x_rotations_, x_rotations)

    y_weights = np.array(
        [[ 0.58989127,  0.7890047 ,  0.1717553 ],
         [ 0.77134053, -0.61351791,  0.16920272],
         [-0.2388767 , -0.03267062,  0.97050016]])
    assert_array_almost_equal(pls_ca.y_weights_, y_weights)

    y_rotations = np.array(
        [[ 0.58989127,  0.7168115 ,  0.30665872],
         [ 0.77134053, -0.70791757,  0.19786539],
         [-0.2388767 , -0.00343595,  0.94162826]])
    assert_array_almost_equal(pls_ca.y_rotations_, y_rotations)


    # 2) Regression PLS (PLS2): "Non regression test"
    # ===============================================
    pls_2 = pls.PLSRegression(n_components=X.shape[1])
    pls_2.fit(X, Y)

    x_weights = np.array(
        [[-0.61330704, -0.00443647,  0.78983213],
         [-0.74697144, -0.32172099, -0.58183269],
         [-0.25668686,  0.94682413, -0.19399983]])
    assert_array_almost_equal(pls_2.x_weights_, x_weights)

    x_loadings = np.array(
        [[-0.61470416, -0.24574278,  0.78983213],
         [-0.65625755, -0.14396183, -0.58183269],
         [-0.51733059,  1.00609417, -0.19399983]])
    assert_array_almost_equal(pls_2.x_loadings_, x_loadings)

    y_weights = np.array(
        [[ 0.32456184,  0.29892183,  0.20316322],
         [ 0.42439636,  0.61970543,  0.19320542],
         [-0.13143144, -0.26348971, -0.17092916]])
    assert_array_almost_equal(pls_2.y_weights_, y_weights)

    y_loadings = np.array(
        [[ 0.32456184,  0.29892183,  0.20316322],
         [ 0.42439636,  0.61970543,  0.19320542],
         [-0.13143144, -0.26348971, -0.17092916]])
    assert_array_almost_equal(pls_2.y_loadings_, y_loadings)