コード例 #1
0
def test_pls_canonical_basics():
    # Basic checks for PLSCanonical
    d = load_linnerud()
    X = d.data
    Y = d.target

    pls = PLSCanonical(n_components=X.shape[1])
    pls.fit(X, Y)

    assert_matrix_orthogonal(pls.x_weights_)
    assert_matrix_orthogonal(pls.y_weights_)
    assert_matrix_orthogonal(pls._x_scores)
    assert_matrix_orthogonal(pls._y_scores)

    # Check X = TP' and Y = UQ'
    T = pls._x_scores
    P = pls.x_loadings_
    U = pls._y_scores
    Q = pls.y_loadings_
    # Need to scale first
    Xc, Yc, x_mean, y_mean, x_std, y_std = _center_scale_xy(
        X.copy(), Y.copy(), scale=True)
    assert_array_almost_equal(Xc, np.dot(T, P.T))
    assert_array_almost_equal(Yc, np.dot(U, Q.T))

    # Check that rotations on training data lead to scores
    Xt = pls.transform(X)
    assert_array_almost_equal(Xt, pls._x_scores)
    Xt, Yt = pls.transform(X, Y)
    assert_array_almost_equal(Xt, pls._x_scores)
    assert_array_almost_equal(Yt, pls._y_scores)

    # Check that inverse_transform works
    X_back = pls.inverse_transform(Xt)
    assert_array_almost_equal(X_back, X)
コード例 #2
0
ファイル: test_pls.py プロジェクト: robypoteau/scikit-learn
def test_sanity_check_pls_canonical():
    # Sanity check for PLSCanonical
    # The results were checked against the R-package plspm

    d = load_linnerud()
    X = d.data
    Y = d.target

    pls = PLSCanonical(n_components=X.shape[1])
    pls.fit(X, Y)

    expected_x_weights = np.array(
        [
            [-0.61330704, 0.25616119, -0.74715187],
            [-0.74697144, 0.11930791, 0.65406368],
            [-0.25668686, -0.95924297, -0.11817271],
        ]
    )

    expected_x_rotations = np.array(
        [
            [-0.61330704, 0.41591889, -0.62297525],
            [-0.74697144, 0.31388326, 0.77368233],
            [-0.25668686, -0.89237972, -0.24121788],
        ]
    )

    expected_y_weights = np.array(
        [
            [+0.58989127, 0.7890047, 0.1717553],
            [+0.77134053, -0.61351791, 0.16920272],
            [-0.23887670, -0.03267062, 0.97050016],
        ]
    )

    expected_y_rotations = np.array(
        [
            [+0.58989127, 0.7168115, 0.30665872],
            [+0.77134053, -0.70791757, 0.19786539],
            [-0.23887670, -0.00343595, 0.94162826],
        ]
    )

    assert_array_almost_equal(np.abs(pls.x_rotations_), np.abs(expected_x_rotations))
    assert_array_almost_equal(np.abs(pls.x_weights_), np.abs(expected_x_weights))
    assert_array_almost_equal(np.abs(pls.y_rotations_), np.abs(expected_y_rotations))
    assert_array_almost_equal(np.abs(pls.y_weights_), np.abs(expected_y_weights))

    x_rotations_sign_flip = np.sign(pls.x_rotations_ / expected_x_rotations)
    x_weights_sign_flip = np.sign(pls.x_weights_ / expected_x_weights)
    y_rotations_sign_flip = np.sign(pls.y_rotations_ / expected_y_rotations)
    y_weights_sign_flip = np.sign(pls.y_weights_ / expected_y_weights)
    assert_array_almost_equal(x_rotations_sign_flip, x_weights_sign_flip)
    assert_array_almost_equal(y_rotations_sign_flip, y_weights_sign_flip)

    assert_matrix_orthogonal(pls.x_weights_)
    assert_matrix_orthogonal(pls.y_weights_)

    assert_matrix_orthogonal(pls._x_scores)
    assert_matrix_orthogonal(pls._y_scores)
コード例 #3
0
def test_convergence_fail():
    # Make sure ConvergenceWarning is raised if max_iter is too small
    d = load_linnerud()
    X = d.data
    Y = d.target
    pls_nipals = PLSCanonical(n_components=X.shape[1], max_iter=2)
    with pytest.warns(ConvergenceWarning):
        pls_nipals.fit(X, Y)
コード例 #4
0
def feature_action_sensitivity(feature_type='TD4'):
    ''' 对每个特征,分析其在不移位和移位情况下的协方差 '''
    results = []
    
    subjects = ['subject_' + str(i + 1) for i in range(5)]
    # print subjects
    # sys.exit(0)

    channel_pos_list = ['S0',                                             # 中心位置
                        'U1', 'U2', 'D1', 'D2', 'L1', 'L2', 'R1', 'R2']  # 上 下 左 右
    pos_num = len(channel_pos_list)
    
    actions = [i+1 for i in range(7)]
    action_num = len(actions)                        # 7 动作类型个数

    if feature_type == 'TD4':
        feature_list = ['MAV', 'ZC', 'SSC', 'WL']
    elif feature_type == 'TD5':
        feature_list = ['MAV', 'ZC', 'SSC', 'WL','RMS']
    feat_num = len(feature_list)                    # 4 特征维度

    groups = [i+1 for i in range(4)]
    group_num = len(groups)                         # 4 通道数
    group_span = group_num*feat_num
    # print group_span
    action_span = feat_num*group_num                # 16
    # print groups, channel_num, channel_span, feat_num
    
    train_dir = 'train4_250_100'


    results.append(['subject', 'action', 'feature', 'group', 'means_shift', 'std_shift'] )
    plsca = PLSCanonical(n_components=2)
    # pos = 1
    k=0
    for pos_idx, pos_name in enumerate(channel_pos_list[1:]):
        pos = pos_idx+1
        for subject in subjects:
            # shift_simulation = np.ones((action_num,action_span,2))
            trains, classes = data_load.load_feature_dataset(train_dir, subject, feature_type)
            # m = trains.shape[0]
            # print trains.shape, classes.shape, m
            # print group_span, group_span*2
            # sys.exit(0)
            # m = trains.shape[0]*2/3
            m = trains.shape[0]/2
            X_train = trains[:m, group_span*pos: group_span*(pos+1)]
            Y_train = trains[:m:, :group_span]
            X_test = trains[m:, group_span*pos: group_span*(pos+1)]
            Y_test = trains[m:, :group_span]

            plsca.fit(X_train, Y_train)
            X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
            X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

            filename=subject+'_'+pos_name
            # plot_plsc_figure(X_train_r,Y_train_r,X_test_r, Y_test_r, filename)
            plot_plsc_figure_two(X_train_r,Y_train_r,X_test_r, Y_test_r, filename)
コード例 #5
0
def correlation_matching(I_tr, T_tr, I_te, T_te, n_comps):
    """ Learns correlation matching (CM) over I_tr and T_tr
        and applies it to I_tr, T_tr, I_te, T_te
        
        
        Parameters
        ----------
        
        I_tr: np.ndarray [shape=(n_tr, d_I)]
            image data matrix for training
        
        T_tr: np.ndarray [shape=(n_tr, d_T)]
            text data matrix for training
        
        I_te: np.ndarray [shape=(n_te, d_I)]
            image data matrix for testing
        
        T_te: np.ndarray [shape=(n_te, d_T)]
            text data matrix for testing
        
        n_comps: int > 0 [scalar]
            number of canonical componens to use
            
        Returns
        -------
        
        I_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            image data matrix represetned in correlation space
        
        T_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            text data matrix represetned in correlation space
        
        I_te_cca : np.ndarray [shape=(n_te, n_comps)]
            image data matrix represetned in correlation space
        
        T_te_cca : np.ndarray [shape=(n_te, n_comps)]
            text data matrix represetned in correlation space
        
        """


    # sclale image and text data
    I_scaler = StandardScaler()
    I_tr = I_scaler.fit_transform(I_tr)
    I_te = I_scaler.transform(I_te)

    T_scaler = StandardScaler()
    T_tr = T_scaler.fit_transform(T_tr)
    T_te = T_scaler.transform(T_te)

    cca = PLSCanonical(n_components=n_comps, scale=False)
    cca.fit(I_tr, T_tr)

    I_tr_cca, T_tr_cca = cca.transform(I_tr, T_tr)
    I_te_cca, T_te_cca = cca.transform(I_te, T_te)

    return I_tr_cca, T_tr_cca, I_te_cca, T_te_cca
コード例 #6
0
def feature_action_sensitivity(feature_type='TD4'):
    ''' 对每个特征,分析其在不移位和移位情况下的协方差 '''
    results = []
    
    subjects = ['subject_' + str(i + 1) for i in range(1)]

    channel_pos_list = ['S0',                                             # 中心位置
                        'U1', 'U2', 'D1', 'D2', 'L1', 'L2', 'R1', 'R2']  # 上 下 左 右
    pos_num = len(channel_pos_list)
    
    actions = [i+1 for i in range(7)]
    action_num = len(actions)                        # 7 动作类型个数

    if feature_type == 'TD4':
        feature_list = ['MAV', 'ZC', 'SSC', 'WL']
    elif feature_type == 'TD5':
        feature_list = ['MAV', 'ZC', 'SSC', 'WL','RMS']
    feat_num = len(feature_list)                    # 4 特征维度

    groups = [i+1 for i in range(4)]
    group_num = len(groups)                         # 4 通道数
    group_span = group_num*feat_num
    # print group_span
    action_span = feat_num*group_num                # 16
    # print groups, channel_num, channel_span, feat_num
    
    train_dir = 'train4_250_100'


    results.append(['subject', 'action', 'feature', 'group', 'means_shift', 'std_shift'] )
    plsca = PLSCanonical(n_components=2)
    # pos = 1
    k=0
    for pos_idx, pos_name in enumerate(channel_pos_list[1:]):
        pos = pos_idx+1
        for subject in subjects:
            # shift_simulation = np.ones((action_num,action_span,2))
            trains, classes = data_load.load_feature_dataset(train_dir, subject, feature_type)
            # m = trains.shape[0]
            # print trains.shape, classes.shape, m
            # print group_span, group_span*2
            # sys.exit(0)
            # m = trains.shape[0]*2/3
            m = trains.shape[0]/2
            X_train = trains[:m, group_span*pos: group_span*(pos+1)]
            Y_train = trains[:m:, :group_span]
            X_test = trains[m:, group_span*pos: group_span*(pos+1)]
            Y_test = trains[m:, :group_span]

            plsca.fit(X_train, Y_train)
            X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
            X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

            filename=subject+'_'+pos_name
            # plot_plsc_figure(X_train_r,Y_train_r,X_test_r, Y_test_r, filename)
            plot_plsc_figure_two(X_train_r,Y_train_r,X_test_r, Y_test_r, filename)
コード例 #7
0
def correlation_matching(I_tr, T_tr, I_te, T_te, n_comps):
    """ Learns correlation matching (CM) over I_tr and T_tr
        and applies it to I_tr, T_tr, I_te, T_te
        
        
        Parameters
        ----------
        
        I_tr: np.ndarray [shape=(n_tr, d_I)]
            image data matrix for training
        
        T_tr: np.ndarray [shape=(n_tr, d_T)]
            text data matrix for training
        
        I_te: np.ndarray [shape=(n_te, d_I)]
            image data matrix for testing
        
        T_te: np.ndarray [shape=(n_te, d_T)]
            text data matrix for testing
        
        n_comps: int > 0 [scalar]
            number of canonical componens to use
            
        Returns
        -------
        
        I_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            image data matrix represetned in correlation space
        
        T_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            text data matrix represetned in correlation space
        
        I_te_cca : np.ndarray [shape=(n_te, n_comps)]
            image data matrix represetned in correlation space
        
        T_te_cca : np.ndarray [shape=(n_te, n_comps)]
            text data matrix represetned in correlation space
        
        """

    # sclale image and text data
    I_scaler = StandardScaler()
    I_tr = I_scaler.fit_transform(I_tr)
    I_te = I_scaler.transform(I_te)

    T_scaler = StandardScaler()
    T_tr = T_scaler.fit_transform(T_tr)
    T_te = T_scaler.transform(T_te)

    cca = PLSCanonical(n_components=n_comps, scale=False)
    cca.fit(I_tr, T_tr)

    I_tr_cca, T_tr_cca = cca.transform(I_tr, T_tr)
    I_te_cca, T_te_cca = cca.transform(I_te, T_te)

    return I_tr_cca, T_tr_cca, I_te_cca, T_te_cca
コード例 #8
0
ファイル: main.py プロジェクト: sealhuang/brainCodingToolbox
def plscorr_eval(train_fmri_ts, train_feat_ts, val_fmri_ts, val_feat_ts,
                 out_dir, mask_file):
    """Compute PLS correlation between brain activity and CNN activation."""
    train_feat_ts = train_feat_ts.reshape(-1, train_feat_ts.shape[3]).T
    val_feat_ts = val_feat_ts.reshape(-1, val_feat_ts.shape[3]).T
    train_fmri_ts = train_fmri_ts.T
    val_fmri_ts = val_fmri_ts.T

    # Iteration loop for different component number
    #for n in range(5, 19):
    #    print '--- Components number %s ---' %(n)
    #    plsca = PLSCanonical(n_components=n)
    #    plsca.fit(train_feat_ts, train_fmri_ts)
    #    pred_feat_c, pred_fmri_c = plsca.transform(val_feat_ts, val_fmri_ts)
    #    pred_fmri_ts = plsca.predict(val_feat_ts)
    #    # calculate correlation coefficient between truth and prediction
    #    r = corr2_coef(val_fmri_ts.T, pred_fmri_ts.T, mode='pair')
    #    # get top 20% corrcoef for model evaluation
    #    vsample = int(np.rint(0.2*len(r)))
    #    print 'Sample size for evaluation : %s' % (vsample)
    #    r.sort()
    #    meanr = np.mean(r[-1*vsample:])
    #    print 'Mean prediction corrcoef : %s' %(meanr)

    # model generation based on optimized CC number
    cc_num = 10
    plsca = PLSCanonical(n_components=cc_num)
    plsca.fit(train_feat_ts, train_fmri_ts)
    from sklearn.externals import joblib
    joblib.dump(plsca, os.path.join(out_dir, 'plsca_model.pkl'))
    plsca = joblib.load(os.path.join(out_dir, 'plsca_model.pkl'))

    # calculate correlation coefficient between truth and prediction
    pred_fmri_ts = plsca.predict(val_feat_ts)
    fmri_pred_r = corr2_coef(val_fmri_ts.T, pred_fmri_ts.T, mode='pair')
    mask = vutil.data_swap(mask_file)
    vxl_idx = np.nonzero(mask.flatten() == 1)[0]
    tmp = np.zeros_like(mask.flatten(), dtype=np.float64)
    tmp[vxl_idx] = fmri_pred_r
    tmp = tmp.reshape(mask.shape)
    vutil.save2nifti(tmp, os.path.join(out_dir, 'pred_fmri_r.nii.gz'))
    pred_feat_ts = pls_y_pred_x(plsca, val_fmri_ts)
    pred_feat_ts = pred_feat_ts.T.reshape(96, 14, 14, 540)
    np.save(os.path.join(out_dir, 'pred_feat.npy'), pred_feat_ts)

    # get PLS-CCA weights
    feat_cc, fmri_cc = plsca.transform(train_feat_ts, train_fmri_ts)
    np.save(os.path.join(out_dir, 'feat_cc.npy'), feat_cc)
    np.save(os.path.join(out_dir, 'fmri_cc.npy'), fmri_cc)
    feat_weight = plsca.x_weights_.reshape(96, 14, 14, cc_num)
    #feat_weight = plsca.x_weights_.reshape(96, 11, 11, cc_num)
    fmri_weight = plsca.y_weights_
    np.save(os.path.join(out_dir, 'feat_weights.npy'), feat_weight)
    np.save(os.path.join(out_dir, 'fmri_weights.npy'), fmri_weight)
    fmri_orig_ccs = get_pls_components(plsca.y_scores_, plsca.y_loadings_)
    np.save(os.path.join(out_dir, 'fmri_orig_ccs.npy'), fmri_orig_ccs)
コード例 #9
0
def generate_transform_equations(trains_S0, trains_shift, **kw):
    print 'generate transform equations.........'
    new_fold(transform_fold)
    chan_len = kw['chan_len']
    for idx, channel_pos in enumerate(kw['pos_list']):
        X_trains = trains_shift[:,idx*chan_len:idx*chan_len+chan_len]
        plsca = PLSCanonical(n_components=12)
        plsca.fit(X_trains, trains_S0)
        joblib.dump(plsca, transform_fold+'/cca_transform_'+kw['subject']+'_'+channel_pos+'.model')
    print 'generate transform equations finished.........'
コード例 #10
0
def generate_transform_equations(trains_S0, trains_shift, **kw):
    print 'generate transform equations.........'
    new_fold(transform_fold)
    chan_len = kw['chan_len']
    for idx, channel_pos in enumerate(kw['pos_list']):
        X_trains = trains_shift[:, idx * chan_len:idx * chan_len + chan_len]
        plsca = PLSCanonical(n_components=12)
        plsca.fit(X_trains, trains_S0)
        joblib.dump(
            plsca, transform_fold + '/cca_transform_' + kw['subject'] + '_' +
            channel_pos + '.model')
    print 'generate transform equations finished.........'
コード例 #11
0
ファイル: visualize.py プロジェクト: juliakreutzer/loons
def drawFaces(emb1, emb2, wordRanking, n, reduction="cut"):
    """
    Plot Chernoff faces for n most/less interesting words
    From: https://gist.github.com/aflaxman/4043086
    :param n: if negative: less interesting
    :param reduction:
    :return:
    """
    s1 = None
    s2 = None
    if reduction=="cut":
        s1 = emb1.getSimMatrix()[0:,0:18]
        s2 = emb2.getSimMatrix()[0:,0:18]
    elif reduction=="svd":
        s1 = TruncatedSVD(n_components=k).fit_transform(emb1.getSimMatrix())
        s2 = TruncatedSVD(n_components=k).fit_transform(emb2.getSimMatrix())
    elif reduction=="cca": #use orginal embeddings, not similarity matrix for reduction
        cca = PLSCanonical(n_components=18)
        cca.fit(emb1.m, emb2.m)
        s1, s2 = cca.transform(emb1.m, emb2.m)
    interesting = list()
    name = str(n)+"."+reduction
    if n<0: #plot uninteresting words
        n *= -1
        interesting = [wordRanking[::-1][i] for i in xrange(n)]
    else:
        interesting = [wordRanking[i] for i in xrange(n)]
    fig = plt.figure(figsize=(11,11))
    c = 0
    for i in range(n):
        word = interesting[i]
        j = emb1.d[word]
        ax = fig.add_subplot(n,2,c+1,aspect='equal')
        mpl_cfaces.cface(ax, *s1[j]) #nice for similarity matrix *s1[j][:18]
        ax.axis([-1.2,1.2,-1.2,1.2])
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title(word)
        ax2 = fig.add_subplot(n,2,c+2,aspect='equal')
        mpl_cfaces.cface(ax2, *s2[j])
        ax2.axis([-1.2,1.2,-1.2,1.2])
        ax2.set_xticks([])
        ax2.set_yticks([])
        ax2.set_title(word)
        c += 2
    plotname = "plots/"+NAME+".cface_s1s2_"+name+".png"
    fig.savefig(plotname)
    print("\tSaved Chernoff faces plot in '%s'" % (plotname))
コード例 #12
0
ファイル: pls_canonical.py プロジェクト: vickyvishal/lale
class _PLSCanonicalImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)

    def predict(self, X):
        return self._wrapped_model.predict(X)
コード例 #13
0
def pls(x, y, num_cc):
    random.seed(42)
    plsca = PLSCanonical(n_components=int(num_cc), algorithm='svd')
    fit = plsca.fit(x, y)
    u = fit.x_weights_
    v = fit.y_weights_
    a1 = np.matmul(np.matrix(x), np.matrix(u)).transpose()
    d = np.matmul(np.matmul(a1, np.matrix(y)), np.matrix(v))
    ds = [d[i, i] for i in range(0, 30)]
    return u, v, ds
コード例 #14
0
def PLS_Canonical(csv_data,
                  point_index,
                  sub_index,
                  var_name,
                  train=None,
                  components=None):
    X_array = []
    temp_array = []
    for j in csv_data:
        temp_array = j[point_index - 1:point_index + 8]
        X_array.append(temp_array)
    X_array = np.array(X_array)
    if components == None:
        components = np.shape(X_array)[1]
    for i in range(7):
        Y_array = np.array(csv_data[:, sub_index - 1 + i])
        plsca = PLSCanonical(n_components=1)
        plsca.fit(X_array, Y_array)
        print(var_name[sub_index + i])
        print("R^2 =", np.around(plsca.score(X_array, Y_array), decimals=2))
コード例 #15
0
ファイル: visualize.py プロジェクト: juliakreutzer/loons
    def getCCARanking(self, filter=None):
        """
        Compare how far apart words are in projection into common space by CCA
        :return:
        """
        cca = PLSCanonical(n_components=self.n)
        cca.fit(self.emb1.m, self.emb2.m)
        m1transformed, m2transformed = cca.transform(self.emb1.m, self.emb2.m)

        #get distances between vectors
        assert self.emb1.vocab_size == self.emb2.vocab_size
        distDict = dict()
        for i in xrange(self.emb1.vocab_size):
            v1 = m1transformed[i]
            v2 = m2transformed[i]
            w = self.emb1.rd[i]
            distDict[w] = 1-Similarity.euclidean(v1,v2)
        ranked = sorted(distDict.iteritems(), key=itemgetter(1), reverse=True)
        if filter is not None:
            ranked = [(w, s) for (w, s) in distDict.iteritems() if w in filter]
        return ranked
コード例 #16
0
ファイル: visualize.py プロジェクト: juliakreutzer/loons
 def plotClustersCCA(self, filter=None):
     """
     Plot clusters in 2dim CCA space: Comparable across embeddings
     :return:
     """
     if len(self.cluster1) <= 1:
         cmap1 = plt.get_cmap('jet', 2)
     else:
         cmap1 = plt.get_cmap('jet', len(self.cluster1))
     cmap1.set_under('gray')
     if len(self.cluster2) <= 1:
         cmap2 = plt.get_cmap('jet', 2)
     else:
         cmap2 = plt.get_cmap('jet', len(self.cluster2))
     cmap2.set_under('gray')
     cca = PLSCanonical(n_components=2)
     cca.fit(self.emb1.m, self.emb2.m)
     m1transformed, m2transformed = cca.transform(self.emb1.m, self.emb2.m)
     labels1 = [self.emb1.rd[i] for i in xrange(self.emb1.vocab_size)]
     colors1 = [self.word2cluster1[self.emb1.rd[i]] for i in xrange(self.emb1.vocab_size)]
     labels2 = [self.emb2.rd[i] for i in xrange(self.emb2.vocab_size)]
     colors2 = [self.word2cluster2[self.emb2.rd[i]] for i in xrange(self.emb2.vocab_size)]
     if filter is not None:
         print("\tFiltering samples to plot")
         filteredIds = [self.emb1.d[w] for w in filter] #get ids for words in filter
         m1transformed = m1transformed[filteredIds]
         m2transformed = m2transformed[filteredIds]
         labels1 = [l for l in labels1 if l in filter]
         labels2 = [l for l in labels2 if l in filter]
     elif m1transformed.shape[0] > 100: #sample indices to display, otherwise it's too messy
         filteredIds = np.random.randint(low=0, high=m1.transformed.shape[0]) #sample filteredIds
         m1transformed = m1transformed[filteredIds]
         m2transformed = m2transformed[filteredIds]
         labels1 = [l for l in labels1 if l in filter]
         labels2 = [l for l in labels2 if l in filter]
     plotWithLabelsAndColors(m1transformed, labels1, colors=colors1, cmap=cmap1, filename="plots/"+NAME+".cca1.png", dimRed="CCA")
     plotWithLabelsAndColors(m2transformed, labels2, colors=colors2, cmap=cmap2, filename="plots/"+NAME+".cca2.png", dimRed="CCA")
コード例 #17
0
Y_train = Y[:n // 2]
X_test = X[n // 2:]
Y_test = Y[n // 2:]

print("Corr(X)")
print(np.round(np.corrcoef(X.T), 2))
print("Corr(Y)")
print(np.round(np.corrcoef(Y.T), 2))

# #############################################################################
# Canonical (symmetric) PLS

# Transform data
# ~~~~~~~~~~~~~~
plsca = PLSCanonical(n_components=2)
plsca.fit(X_train, Y_train)
X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

# Scatter plot of scores
# ~~~~~~~~~~~~~~~~~~~~~~
# 1) On diagonal plot X vs Y scores on each components
plt.figure(figsize=(12, 8))
plt.subplot(221)
plt.plot(X_train_r[:, 0], Y_train_r[:, 0], "ob", label="train")
plt.plot(X_test_r[:, 0], Y_test_r[:, 0], "or", label="test")
plt.xlabel("x scores")
plt.ylabel("y scores")
plt.title('Comp. 1: X vs Y (test corr = %.2f)' %
          np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1])
plt.xticks(())
コード例 #18
0
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.cross_decomposition import PLSCanonical
from sklearn.neighbors import KNeighborsClassifier
import math
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

dataSet = datasets.load_digits()
data = dataSet["data"]
target = dataSet["target"]

plsca = PLSCanonical(n_components=2)
plsca.fit(data, target)

X_train_r, Y_train_r = plsca.transform(data, target)

knn = math.sqrt(len(X_train_r))
knn = KNeighborsClassifier(n_neighbors=int(knn))

Y_train_r = [int(Y_train_r[i]) for i in range(0, len(Y_train_r))]

k = knn.fit(X_train_r, Y_train_r)
print(k.score(X_train_r, Y_train_r))
knn = KNeighborsClassifier(n_neighbors=4)

sfs = SFS(knn,
          k_features=3,
          forward=True,
          floating=False,
          verbose=2,
コード例 #19
0
ファイル: 04.py プロジェクト: Isur/artificial-intelligence
    knn = KNeighborsClassifier(round(math.sqrt(mnist.data.shape[0])),
                               metric='euclidean',
                               weights='uniform')
    knn.fit(lda_train, train_targets)
    print("Score for ", i, " components: ", knn.score(lda_test, test_targets))
    if max_value < knn.score(lda_test, test_targets):
        max_value = knn.score(lda_test, test_targets)
        max_number = i
print("Max for: ", max_number, " is: ", max_value)

# Zadanie 4:
max_value = 0
max_number = 0
for i in range(1, 6):
    plsca = PLSCanonical(n_components=i)
    plsca.fit(train, train_targets)
    pls_train = plsca.fit(train, train_targets).transform(train)
    pls_test = plsca.fit(test, test_targets).transform(test)
    knn = KNeighborsClassifier(round(math.sqrt(mnist.data.shape[0])),
                               metric='euclidean',
                               weights='uniform')
    knn.fit(pls_train, train_targets)
    print("Score for ", i, " components: ", knn.score(pls_test, test_targets))
    if max_value < knn.score(pls_test, test_targets):
        max_value = knn.score(pls_test, test_targets)
        max_number = i
print("Max for: ", max_number, " is: ", max_value)

# Zadanie 5:
knn = KNeighborsClassifier(round(math.sqrt(mnist.data.shape[0])))
sfs = SFS(knn,
コード例 #20
0
ファイル: Zad4.py プロジェクト: justynias/AIS
#Następnie sprawdź sprawność klasyfikatora kNN dla zbioru testowego ograniczonego do wybranego 
#podzbioru cech. Parametr kk przyjmij jako pierwiastek z liczby obiektów w zbiorze. 
#Dla jakiej liczby cech osiągnięto najlepsze rezultaty?
from sklearn import datasets
from sklearn import model_selection
from sklearn.neighbors import KNeighborsClassifier
import math
from sklearn.cross_decomposition import PLSCanonical

mnist_dataset = datasets.load_digits()
X = mnist_dataset.data
Y = mnist_dataset.target
target_names = mnist_dataset.target_names
train, test, train_targets, test_targets = model_selection.train_test_split(X, Y, train_size=0.5,test_size=0.5)

                                                                            
max = 0
max_n_components = 0
for i in range(1, 10):
    plsca = PLSCanonical(n_components=i)
    plsca.fit(train, train_targets)
    X_r = plsca.fit(train, train_targets).transform(train)
    Y_r = plsca.fit(test, test_targets).transform(test)
    clf = KNeighborsClassifier(round(math.sqrt(X.shape[0])),weights="uniform", metric="euclidean")
    clf.fit(X_r, train_targets)
    print(i, ":", clf.score(Y_r, test_targets))
    if max < clf.score(Y_r, test_targets):
        max = clf.score(Y_r, test_targets)
        max_n_components = i
        
print("Best result for:", max_n_components)
コード例 #21
0
#correct not accurate
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.cross_decomposition import PLSRegression
from sklearn.cross_decomposition import PLSCanonical

df = pd.read_csv('newdata.csv')
x = df.drop(['tag'], axis=1)
y = df.drop(['kx', 'ky', 'kz', 'wa', 'wb', 'wc', 'wd', 'we', 'wf'], axis=1)
X_train, X_test, Y_train, Y_test = train_test_split(x, y, random_state=5)

plsr = PLSRegression()
plsr.fit(X_train, Y_train)

plsc = PLSCanonical()
plsc.fit(X_train, Y_train)

print(plsr.score(X_test, Y_test))
print(plsc.score(X_test, Y_test))
コード例 #22
0
#correct not accurate
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.cross_decomposition import PLSRegression
from sklearn.cross_decomposition import PLSCanonical
df=pd.read_csv('newdata.csv')
x=df.drop(['tag'],axis=1)
y=df.drop(['kx','ky','kz','wa','wb','wc','wd','we','wf'],axis=1)
X_train , X_test , Y_train , Y_test = train_test_split(x,y , random_state=5)

plsr=PLSRegression()
plsr.fit(X_train,Y_train)

plsc=PLSCanonical()
plsc.fit(X_train,Y_train)

print (plsr.score(X_test,Y_test))
print (plsc.score(X_test,Y_test))
コード例 #23
0
ファイル: linear.py プロジェクト: sunshiding/cca_zoo
class Wrapper:
    """
    This is a wrapper class for linear, regularised and kernel  CCA, Multiset CCA and Generalized CCA.
    We create an instance with a method and number of latent dimensions.
    If we have more than 2 views we need to use generalized methods, but we can override in the 2 view case also with
    the generalized parameter.

    The class has a number of methods:

    fit(): gives us train correlations and stores the variables needed for out of sample prediction as well as some
    method-specific variables

    cv_fit(): allows us to perform a hyperparameter search and then fit the model using the optimal hyperparameters

    predict_corr(): allows us to predict the out of sample correlation for supplied views

    predict_view(): allows us to predict a reconstruction of missing views from the supplied views

    transform_view(): allows us to transform given views to the latent variable space

    remaining methods are used to
    """

    def __init__(self, latent_dims: int = 1, method: str = 'l2', generalized: bool = False, max_iter: int = 500,
                 tol=1e-6):
        self.latent_dims = latent_dims
        self.method = method
        self.generalized = generalized
        self.max_iter = max_iter
        self.tol = tol

    def fit(self, *args, params=None):
        if params is None:
            params = {}
        self.params = params
        if len(args) > 2:
            self.generalized = True
            print('more than 2 views therefore switched to generalized')
        if 'c' not in self.params:
            self.params = {'c': [0] * len(args)}
        if self.method == 'kernel':
            #Linear kernel by default
            if 'kernel' not in self.params:
                self.params['kernel'] = 'linear'
            #First order polynomial by default
            if 'degree' not in self.params:
                self.params['degree'] = 1
            # First order polynomial by default
            if 'sigma' not in self.params:
                self.params['sigma'] = 1.0

        # Fit returns in-sample score vectors and correlations as well as models with transform functionality
        self.dataset_list = []
        self.dataset_means = []
        for dataset in args:
            self.dataset_means.append(dataset.mean(axis=0))
            self.dataset_list.append(dataset - dataset.mean(axis=0))

        if self.method == 'kernel':
            self.fit_kcca = cca_zoo.KCCA.KCCA(self.dataset_list[0], self.dataset_list[1], params=self.params,
                                              latent_dims=self.latent_dims)
            self.score_list = [self.fit_kcca.U, self.fit_kcca.V]
        elif self.method == 'pls':
            self.fit_scikit_pls(self.dataset_list[0], self.dataset_list[1])
        elif self.method == 'scikit':
            self.fit_scikit_cca(self.dataset_list[0], self.dataset_list[1])
        elif self.method == 'mcca':
            self.fit_mcca(*self.dataset_list)
        elif self.method == 'gcca':
            self.fit_gcca(*self.dataset_list)
        else:
            self.outer_loop(*self.dataset_list)
            if self.method[:4] == 'tree':
                self.tree_list = [self.tree_list[i] for i in range(len(args))]
                self.weights_list = [np.expand_dims(tree.feature_importances_, axis=1) for tree in self.tree_list]
            else:
                self.rotation_list = []
                for i in range(len(args)):
                    self.rotation_list.append(
                        self.weights_list[i] @ pinv2(self.loading_list[i].T @ self.weights_list[i], check_finite=False))
        self.train_correlations = self.predict_corr(*args)
        return self

    def cv_fit(self, *args, param_candidates=None, folds: int = 5, verbose: bool = False):
        best_params = cross_validate(*args, max_iter=self.max_iter, latent_dims=self.latent_dims, method=self.method,
                                     param_candidates=param_candidates, folds=folds,
                                     verbose=verbose, tol=self.tol)
        self.fit(*args, params=best_params)
        return self

    def bayes_cv_fit(self, *args, param_candidates=None, folds: int = 5, verbose: bool = False):
        space = {
            "n_estimators": hp.choice("n_estimators", [100, 200, 300, 400, 500, 600]),
            "max_depth": hp.quniform("max_depth", 1, 15, 1),
            "criterion": hp.choice("criterion", ["gini", "entropy"]),
        }

        trials = Trials()

        best_params = fmin(
            fn=Wrapper(),
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials
            )
        self.fit(*args, params=best_params)
        return self

    def predict_corr(self, *args):
        # Takes two datasets and predicts their out of sample correlation using trained model
        transformed_views = self.transform_view(*args)
        all_corrs = []
        for x, y in itertools.product(transformed_views, repeat=2):
            all_corrs.append(np.diag(np.corrcoef(x.T, y.T)[:self.latent_dims, self.latent_dims:]))
        all_corrs = np.array(all_corrs).reshape((len(args), len(args), self.latent_dims))
        return all_corrs

    def predict_view(self, *args):
        # Regress original given views onto target
        transformed_views = self.transform_view(*args)

        # Get the regression from the training data with available views
        predicted_target = np.mean([transformed_views[i] for i in range(len(args)) if args[i] is not None], axis=0)

        predicted_views = []
        for i, view in enumerate(args):
            if view is None:
                predicted_views.append(predicted_target @ pinv2(self.weights_list[i]))
            else:
                predicted_views.append(view)
        for i, predicted_view in enumerate(predicted_views):
            predicted_views[i] += self.dataset_means[i]
        return predicted_views

    def transform_view(self, *args):
        # Demeaning
        new_views = []
        for i, new_view in enumerate(args):
            if new_view is None:
                new_views.append(None)
            else:
                new_views.append(new_view - self.dataset_means[i])

        if self.method == 'kernel':
            transformed_views = list(self.fit_kcca.transform(new_views[0], new_views[1]))
        elif self.method == 'pls':
            transformed_views = list(self.PLS.transform(new_views[0], new_views[1]))
        elif self.method[:4] == 'tree':
            transformed_views = []
            for i, new_view in enumerate(new_views):
                if new_view is None:
                    transformed_views.append(None)
                else:
                    transformed_views.append(self.tree_list[i].predict(new_view))
        else:
            transformed_views = []
            for i, new_view in enumerate(new_views):
                if new_view is None:
                    transformed_views.append(None)
                else:
                    transformed_views.append(new_view @ self.rotation_list[i])
        # d x n x k
        return transformed_views

    def outer_loop(self, *args):
        # list of d: p x k
        self.weights_list = [np.zeros((args[i].shape[1], self.latent_dims)) for i in range(len(args))]
        # list of d: n x k
        self.score_list = [np.zeros((args[i].shape[0], self.latent_dims)) for i in range(len(args))]
        # list of d:
        self.loading_list = [np.zeros((args[i].shape[1], self.latent_dims)) for i in range(len(args))]

        if len(args) == 2:
            C_train = args[0].T @ args[1]
            C_train_res = C_train.copy()
        else:
            C_train_res = None

        residuals = list(args)
        # For each of the dimensions
        for k in range(self.latent_dims):
            self.inner_loop = cca_zoo.alternating_least_squares.ALS_inner_loop(*residuals, C=C_train_res,
                                                                               generalized=self.generalized,
                                                                               params=self.params,
                                                                               method=self.method,
                                                                               max_iter=self.max_iter)
            for i in range(len(args)):
                if self.method[:4] == 'tree':
                    self.tree_list = self.inner_loop.weights
                else:
                    self.weights_list[i][:, k] = self.inner_loop.weights[i]
                    self.score_list[i][:, k] = self.inner_loop.targets[i, :]
                    self.loading_list[i][:, k] = residuals[i].T @ self.score_list[i][:, k] / np.linalg.norm(
                        self.score_list[i][:, k])
                    residuals[i] -= np.outer(self.score_list[i][:, k] / np.linalg.norm(self.score_list[i][:, k]),
                                             self.loading_list[i][:, k])
        return self

    def fit_scikit_cca(self, train_set_1, train_set_2):
        self.cca = CCA(n_components=self.latent_dims, scale=False)
        self.cca.fit(train_set_1, train_set_2)
        self.score_list = [self.cca.x_scores_, self.cca.y_scores_]
        self.weights_list = [self.cca.x_weights_, self.cca.y_weights_]
        self.loading_list = [self.cca.x_loadings_, self.cca.y_loadings_]
        self.rotation_list = [self.cca.x_rotations_, self.cca.y_rotations_]
        return self

    def fit_scikit_pls(self, train_set_1, train_set_2):
        self.PLS = PLSCanonical(n_components=self.latent_dims, scale=False)
        self.PLS.fit(train_set_1, train_set_2)
        self.score_list = [self.PLS.x_scores_, self.PLS.y_scores_]
        self.weights_list = [self.PLS.x_weights_, self.PLS.y_weights_]
        return self

    def fit_mcca(self, *args):
        all_views = np.concatenate(args, axis=1)
        C = all_views.T @ all_views
        # Can regularise by adding to diagonal
        D = block_diag(*[(1 - self.params['c'][i]) * m.T @ m + self.params['c'][i] * np.eye(m.shape[1]) for i, m in
                         enumerate(args)])
        R = cholesky(D, lower=False)
        whitened = np.linalg.inv(R.T) @ C @ np.linalg.inv(R)
        [eigvals, eigvecs] = np.linalg.eig(whitened)
        idx = np.argsort(eigvals, axis=0)[::-1]
        eigvecs = eigvecs[:, idx].real
        eigvals = eigvals[idx].real
        eigvecs = np.linalg.inv(R) @ eigvecs
        splits = np.cumsum([0] + [view.shape[1] for view in args])
        self.weights_list = [eigvecs[splits[i]:splits[i + 1], :self.latent_dims] for i in range(len(args))]
        self.rotation_list = self.weights_list
        self.score_list = [self.dataset_list[i] @ self.weights_list[i] for i in range(len(args))]

    def fit_gcca(self, *args):
        Q = []
        for i, view in enumerate(args):
            view_cov = view.T @ view
            view_cov = (1 - self.params['c'][i]) * view_cov + self.params['c'][i] * np.eye(view_cov.shape[0])
            Q.append(view @ np.linalg.inv(view_cov) @ view.T)
        Q = np.sum(Q, axis=0)
        [eigvals, eigvecs] = np.linalg.eig(Q)
        idx = np.argsort(eigvals, axis=0)[::-1]
        eigvecs = eigvecs[:, idx].real
        eigvals = eigvals[idx].real
        self.weights_list = [np.linalg.pinv(view) @ eigvecs[:, :self.latent_dims] for view in args]
        self.rotation_list = self.weights_list
        self.score_list = [self.dataset_list[i] @ self.weights_list[i] for i in range(len(args))]
コード例 #24
0
if not include_negative_weights:

    # set negative connectivities to 0
    edge_data = np.apply_along_axis(
        lambda x: [0 if element < 0 else element for element in x], 1,
        edge_data)

# re-split data (3 ways) for CCA
X1_train = edge_data[:140, :]
X2_train = edge_data[140:280, :]
X2_remain = edge_data[280:, :]
#cca = CCA(n_components =2)
#cca.fit(X1_train, X2_train)
cca = PLSCanonical(n_components=100)
cca.fit(X1_train, X2_train)
block_1_transformed, block_2_transformed = cca.transform(X1_train,
                                                         X2_train,
                                                         copy=False)
block_3_transformed = np.dot(X2_remain, cca.y_rotations_)

edge_data_transformed = np.vstack(
    (block_1_transformed, block_2_transformed, block_3_transformed))
# initialise the classifier

clf = svm.SVC(kernel='precomputed')

# optional shuffle
perm = np.random.permutation(n_subjects)
#print perm
#print n_subjects
コード例 #25
0
ファイル: Module2.py プロジェクト: manuwhs/UC3M-Projects
    plt.xlim(1, np.amax(nComponents))
    plt.title('PLS SVD accuracy')
    plt.xlabel('Number of components')
    plt.ylabel('accuracy')
    plt.legend(['LR', 'LDA', 'GNB', 'Linear SVM', 'rbf SVM'],
               loc='lower right')
    plt.grid(True)

if (0):
    #%% PLS Cannonical
    nComponents = np.arange(1, nClasses + 1)
    plsCanScores = np.zeros((5, np.alen(nComponents)))
    for i, n in enumerate(nComponents):
        plscan = PLSCanonical(n_components=n)
        plscan.fit(Xtrain, Ytrain)
        XtrainT = plscan.transform(Xtrain)
        XtestT = plscan.transform(Xtest)
        plsCanScores[:, i] = util.classify(XtrainT, XtestT, labelsTrain,
                                           labelsTest)

    plscan = PLSCanonical(n_components=2)
    plscan.fit(Xtrain, Ytrain)
    xt = plscan.transform(Xtrain)
    fig = plt.figure()
    util.plotData(fig, xt, labelsTrain, classColors)
    plt.title('First 2 components of projected data')

    #%% Plot accuracies for PLSSVD
    plt.figure()
    for i in range(5):
コード例 #26
0
        plt.plot(nComponents,plsSvdScores[i,:],lw=3)

    plt.xlim(1,np.amax(nComponents))
    plt.title('PLS SVD accuracy')
    plt.xlabel('Number of components')
    plt.ylabel('accuracy')
    plt.legend (['LR','LDA','GNB','Linear SVM','rbf SVM'],loc='lower right')
    plt.grid(True)

if (0):
    #%% PLS Cannonical
    nComponents = np.arange(1,nClasses+1)
    plsCanScores = np.zeros((5,np.alen(nComponents)))
    for i,n in enumerate(nComponents):
        plscan = PLSCanonical(n_components=n)
        plscan.fit(Xtrain,Ytrain)
        XtrainT = plscan.transform(Xtrain)
        XtestT = plscan.transform(Xtest)
        plsCanScores[:,i] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)

    
    plscan = PLSCanonical(n_components=2)
    plscan.fit(Xtrain,Ytrain)
    xt = plscan.transform(Xtrain)
    fig = plt.figure()
    util.plotData(fig,xt,labelsTrain,classColors)
    plt.title('First 2 components of projected data')
    

    #%% Plot accuracies for PLSSVD 
    plt.figure()
コード例 #27
0
plt.plot([0, 1], [0, 1])
plt.xlim([0, 1])
plt.gca().set_aspect('equal', adjustable='box')
plt.legend(['Cell volume', 'Age', 'Both'])

#NB: Strong colinearity between Age and Volume

# Transition rate prediction using PLS
X = dfc_g1[['vol_sm', 'Age', 'gr_sm']]  # Design matrix
y = dfc_g1['G1S_logistic']  # Response var
# Drop NaN rows
I = np.isnan(dfc_g1['gr_sm'])
X = X.loc[~I].copy()
y = y[~I]
pls_model = PLSCanonical()
pls_model.fit(scale(X), y)

X_c, y_c = pls_model.transform(scale(X), y)

# Multiple linearregression on birth size and growth rate
df['bvol'] = df['Birth volume']
df['exp_gr'] = df['Exponential growth rate']
df['g1_len'] = df['G1 length']
model = smf.ols('g1_len ~ exp_gr + bvol', data=df).fit()
model.summary()
print model.pvalues

# Delete S/G2 after first time point
g1s_marked = []
for c in collated_filtered:
    c = c[c['Phase'] != 'Daughter G1'].copy()
コード例 #28
0
def training_lda_TD4_intra(my_clfs, trains, classes, **kw):

    start_time = time.time()
    if(kw.has_key('log_fold')):
        log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num = kw['action_num']
    cv = 3
    results = []
    results.append(
        ['Feat', 'Algorithm','n_components', 'Channel_Pos', 'Accuracy', 'std'])
    log_file = 'feat_'+kw['feature_type']+'_intra'

    clf = sklearn.lda.LDA(solver='svd', shrinkage=None, priors=None,
                          n_components=None, store_covariance=False,
                          tol=0.0001)

    data_num = trains.shape[0]/action_num

    scores = sklearn.cross_validation.cross_val_score(clf, trains, classes, cv=cv)
    results.append(['feat_TD4_cv_'+str(cv), 'lda', 'ALL', 0, scores.mean(), scores.std()])
    
    # 组内训练策略 9组数据
    print '组内训练.............'
    for idx, channel_pos in enumerate(kw['pos_list']):
        # print '----training TD4 intra , channel_pos: ', channel_pos,'......'
        trains_intra = trains[:,idx*chan_len: idx*chan_len+chan_len]

        scores = sklearn.cross_validation.cross_val_score(
            clf, trains_intra, classes, cv=cv)
        results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, channel_pos, scores.mean(), scores.std()])


    # 中心训练策略
    print '中心训练策略.............'
    trains_intra_S0 = trains[:,0:chan_len]
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        tests_shift = trains[:,idx*chan_len: idx*chan_len+chan_len]
        # if channel_pos == 'L2':
        #     print idx*chan_len, idx*chan_len+chan_len, tests_shift.shape, trains.shape
        #     sys.exit(0)
        scores = clf.fit(trains_intra_S0, classes).score(tests_shift, classes)
        results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, 'train S0' + ' test ' + channel_pos, scores.mean(), scores.std()])
    
    # 组训练策略(不同于组内训练策略) 5-fold
    print '组训练策略.............'
    trains_intra_S0 = trains[:,0:chan_len]
    kf = KFold(data_num, n_folds=cv)
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        itera = cv
        scores = np.zeros( (itera,) )
        # stds = np.zeros( (itera,) )
        itera -= 1
        trains_shift = trains[:,idx*chan_len: idx*chan_len+chan_len]
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)

            for action_idx in range(action_num):
                train_idx_all = np.concatenate( (train_idx_all, train_idx*(action_idx+1)), axis=0)
                test_idx_all = np.concatenate( (test_idx_all, test_idx*(action_idx+1)), axis=0)

            X_train = np.concatenate( (trains_intra_S0[train_idx_all], trains_shift[train_idx_all]), axis=0)
            y_train = np.concatenate( (classes[train_idx_all], classes[train_idx_all]), axis=0)

            X_test = trains_shift[test_idx_all]
            y_test = classes[test_idx_all]

            # X_test = trains_shift
            # y_test = classes

            score = clf.fit(X_train, y_train).score(X_test, y_test)
            scores[itera] = score.mean()
            
            itera -= 1
        # print scores
        results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, 'S0 + '+channel_pos, np.mean(scores), np.std(scores)])
    
    # 基于CCA的训练策略 5-fold 交叉验证
    print 'CCA训练策略.............'
    trains_S0 = trains[:,0:chan_len]
    n_components_list = [6, 8, 10, 12, 14, 16]              # 子空间维数
    # n_components_list = [12,14,16]
    kf = KFold(data_num, n_folds=cv)
    for n_components in n_components_list:
        
        for idx, channel_pos in enumerate(kw['pos_list']):
            if channel_pos == 'S0':
                continue
            itera = cv
            scores = np.zeros( (itera,) )
            stds = np.zeros( (itera,) )
            itera -= 1
            trains_shift = trains[:,idx*chan_len: idx*chan_len+chan_len]
            for train_idx, test_idx in kf:
                train_idx_all = np.array([], np.int)
                test_idx_all = np.array([], np.int)
                for action_idx in range(action_num):
                    train_idx_all = np.concatenate( (train_idx_all, train_idx*(action_idx+1)), axis=0)
                    test_idx_all = np.concatenate( (test_idx_all, test_idx*(action_idx+1)), axis=0)
                # print train_idx_all.shape, train_idx_all, test_idx_all.shape, test_idx_all
                # plsca.fit(trains_shift[train_idx_all], trains_S0[train_idx_all])
                plsca = PLSCanonical(n_components=n_components)
                plsca.fit(trains_shift, trains_S0)
                trains_shift_cca, trains_S0_cca = plsca.transform(trains_shift, trains_S0)
                X_trains = np.concatenate( (trains_S0_cca, trains_shift_cca[train_idx_all]), axis=0)
                y_trains = np.concatenate( (classes, classes[train_idx_all]), axis=0)
                score = clf.fit(X_trains, y_trains).score(trains_shift_cca[test_idx_all], classes[test_idx_all])

                scores[itera] = score.mean()
                # stds[itera] = score.std()
                itera -= 1
            
            results.append(['feat_TD4_cv_'+str(cv), 'lda_cca', n_components, 'S0 + '+channel_pos, np.mean(scores), np.std(scores)])
    
    log_result(results, log_fold + '/' + log_file + '_action_1-'+str(action_num), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_action_1-'+str(action_num)
    print '----training TD4 time elapsed:', time.time() - start_time
コード例 #29
0
    plt.plot(Y_train_r[:, 0], Y_train_r[:, 1], "*b", label="train")
    plt.plot(Y_test_r[:, 0], Y_test_r[:, 1], "*r", label="test")
    plt.xlabel("Y comp. 1")
    plt.ylabel("Y comp. 2")
    plt.title('Y comp. 1 vs Y comp. 2 , (test corr = %.2f)' %
              numpy.corrcoef(Y_test_r[:, 0], Y_test_r[:, 1])[0, 1])
    plt.legend(loc="best")
    plt.xticks(())
    plt.yticks(())
    plt.savefig(output_file)
    plt.close()


# PLSCA
plsca = PLSCanonical(n_components=2)
plsca.fit(Xtrain, Ytrain)
# PLSCanonical(algorithm='nipals', copy=True, max_iter=500, n_components=2,
#       scale=True, tol=1e-06)
X_train_r, Y_train_r = plsca.transform(Xtrain, Ytrain)
X_test_r, Y_test_r = plsca.transform(Xtest, Ytest)
do_plot(X_train_r, Y_train_r, X_test_r, Y_test_r,
        '%s/PLSCA_2comp_norm.pdf' % output_folder)

# CCA
# probably not necessary, but just in case the data was modified in some way
Ytrain = norm.loc[train, :]
Ytest = norm.loc[holdout, :]
Xtrain = numpy.array(X.loc[train, :])
Xtest = X.loc[holdout, :]
cca = CCA(n_components=2)
cca.fit(Xtrain, Ytrain)
コード例 #30
0
def test_sanity_check_pls_canonical_random():
    # Sanity check for PLSCanonical on random data
    # The results were checked against the R-package plspm
    n = 500
    p_noise = 10
    q_noise = 5
    # 2 latents vars:
    rng = check_random_state(11)
    l1 = rng.normal(size=n)
    l2 = rng.normal(size=n)
    latents = np.array([l1, l1, l2, l2]).T
    X = latents + rng.normal(size=4 * n).reshape((n, 4))
    Y = latents + rng.normal(size=4 * n).reshape((n, 4))
    X = np.concatenate((X, rng.normal(size=p_noise * n).reshape(n, p_noise)),
                       axis=1)
    Y = np.concatenate((Y, rng.normal(size=q_noise * n).reshape(n, q_noise)),
                       axis=1)

    pls = PLSCanonical(n_components=3)
    pls.fit(X, Y)

    expected_x_weights = np.array([
        [0.65803719, 0.19197924, 0.21769083],
        [0.7009113, 0.13303969, -0.15376699],
        [0.13528197, -0.68636408, 0.13856546],
        [0.16854574, -0.66788088, -0.12485304],
        [-0.03232333, -0.04189855, 0.40690153],
        [0.1148816, -0.09643158, 0.1613305],
        [0.04792138, -0.02384992, 0.17175319],
        [-0.06781, -0.01666137, -0.18556747],
        [-0.00266945, -0.00160224, 0.11893098],
        [-0.00849528, -0.07706095, 0.1570547],
        [-0.00949471, -0.02964127, 0.34657036],
        [-0.03572177, 0.0945091, 0.3414855],
        [0.05584937, -0.02028961, -0.57682568],
        [0.05744254, -0.01482333, -0.17431274],
    ])

    expected_x_loadings = np.array([
        [0.65649254, 0.1847647, 0.15270699],
        [0.67554234, 0.15237508, -0.09182247],
        [0.19219925, -0.67750975, 0.08673128],
        [0.2133631, -0.67034809, -0.08835483],
        [-0.03178912, -0.06668336, 0.43395268],
        [0.15684588, -0.13350241, 0.20578984],
        [0.03337736, -0.03807306, 0.09871553],
        [-0.06199844, 0.01559854, -0.1881785],
        [0.00406146, -0.00587025, 0.16413253],
        [-0.00374239, -0.05848466, 0.19140336],
        [0.00139214, -0.01033161, 0.32239136],
        [-0.05292828, 0.0953533, 0.31916881],
        [0.04031924, -0.01961045, -0.65174036],
        [0.06172484, -0.06597366, -0.1244497],
    ])

    expected_y_weights = np.array([
        [0.66101097, 0.18672553, 0.22826092],
        [0.69347861, 0.18463471, -0.23995597],
        [0.14462724, -0.66504085, 0.17082434],
        [0.22247955, -0.6932605, -0.09832993],
        [0.07035859, 0.00714283, 0.67810124],
        [0.07765351, -0.0105204, -0.44108074],
        [-0.00917056, 0.04322147, 0.10062478],
        [-0.01909512, 0.06182718, 0.28830475],
        [0.01756709, 0.04797666, 0.32225745],
    ])

    expected_y_loadings = np.array([
        [0.68568625, 0.1674376, 0.0969508],
        [0.68782064, 0.20375837, -0.1164448],
        [0.11712173, -0.68046903, 0.12001505],
        [0.17860457, -0.6798319, -0.05089681],
        [0.06265739, -0.0277703, 0.74729584],
        [0.0914178, 0.00403751, -0.5135078],
        [-0.02196918, -0.01377169, 0.09564505],
        [-0.03288952, 0.09039729, 0.31858973],
        [0.04287624, 0.05254676, 0.27836841],
    ])

    assert_array_almost_equal(np.abs(pls.x_loadings_),
                              np.abs(expected_x_loadings))
    assert_array_almost_equal(np.abs(pls.x_weights_),
                              np.abs(expected_x_weights))
    assert_array_almost_equal(np.abs(pls.y_loadings_),
                              np.abs(expected_y_loadings))
    assert_array_almost_equal(np.abs(pls.y_weights_),
                              np.abs(expected_y_weights))

    x_loadings_sign_flip = np.sign(pls.x_loadings_ / expected_x_loadings)
    x_weights_sign_flip = np.sign(pls.x_weights_ / expected_x_weights)
    y_weights_sign_flip = np.sign(pls.y_weights_ / expected_y_weights)
    y_loadings_sign_flip = np.sign(pls.y_loadings_ / expected_y_loadings)
    assert_array_almost_equal(x_loadings_sign_flip, x_weights_sign_flip)
    assert_array_almost_equal(y_loadings_sign_flip, y_weights_sign_flip)

    assert_matrix_orthogonal(pls.x_weights_)
    assert_matrix_orthogonal(pls.y_weights_)

    assert_matrix_orthogonal(pls._x_scores)
    assert_matrix_orthogonal(pls._y_scores)
コード例 #31
0
def plot_compare_cross_decomposition():
    # Dataset based latent variables model

    n = 500
    # 2 latents vars:
    l1 = np.random.normal(size=n)
    l2 = np.random.normal(size=n)

    latents = np.array([l1, l1, l2, l2]).T
    X = latents + np.random.normal(size=4 * n).reshape((n, 4))
    Y = latents + np.random.normal(size=4 * n).reshape((n, 4))

    X_train = X[:n // 2]
    Y_train = Y[:n // 2]
    X_test = X[n // 2:]
    Y_test = Y[n // 2:]

    print("Corr(X)")
    print(np.round(np.corrcoef(X.T), 2))
    print("Corr(Y)")
    print(np.round(np.corrcoef(Y.T), 2))

    # #############################################################################
    # Canonical (symmetric) PLS

    # Transform data
    # ~~~~~~~~~~~~~~
    plsca = PLSCanonical(n_components=2)
    plsca.fit(X_train, Y_train)
    X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
    X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

    # Scatter plot of scores
    # ~~~~~~~~~~~~~~~~~~~~~~
    # 1) On diagonal plot X vs Y scores on each components
    plt.figure(figsize=(12, 8))
    plt.subplot(221)
    plt.scatter(X_train_r[:, 0],
                Y_train_r[:, 0],
                label="train",
                marker="o",
                s=25)
    plt.scatter(X_test_r[:, 0], Y_test_r[:, 0], label="test", marker="o", s=25)
    plt.xlabel("x scores")
    plt.ylabel("y scores")
    plt.title('Comp. 1: X vs Y (test corr = %.2f)' %
              np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1])
    plt.xticks(())
    plt.yticks(())
    plt.legend(loc="best")

    plt.subplot(224)
    plt.scatter(X_train_r[:, 1],
                Y_train_r[:, 1],
                label="train",
                marker="o",
                s=25)
    plt.scatter(X_test_r[:, 1], Y_test_r[:, 1], label="test", marker="o", s=25)
    plt.xlabel("x scores")
    plt.ylabel("y scores")
    plt.title('Comp. 2: X vs Y (test corr = %.2f)' %
              np.corrcoef(X_test_r[:, 1], Y_test_r[:, 1])[0, 1])
    plt.xticks(())
    plt.yticks(())
    plt.legend(loc="best")

    # 2) Off diagonal plot components 1 vs 2 for X and Y
    plt.subplot(222)
    plt.scatter(X_train_r[:, 0],
                X_train_r[:, 1],
                label="train",
                marker="*",
                s=50)
    plt.scatter(X_test_r[:, 0], X_test_r[:, 1], label="test", marker="*", s=50)
    plt.xlabel("X comp. 1")
    plt.ylabel("X comp. 2")
    plt.title('X comp. 1 vs X comp. 2 (test corr = %.2f)' %
              np.corrcoef(X_test_r[:, 0], X_test_r[:, 1])[0, 1])
    plt.legend(loc="best")
    plt.xticks(())
    plt.yticks(())

    plt.subplot(223)
    plt.scatter(Y_train_r[:, 0],
                Y_train_r[:, 1],
                label="train",
                marker="*",
                s=50)
    plt.scatter(Y_test_r[:, 0], Y_test_r[:, 1], label="test", marker="*", s=50)
    plt.xlabel("Y comp. 1")
    plt.ylabel("Y comp. 2")
    plt.title('Y comp. 1 vs Y comp. 2 , (test corr = %.2f)' %
              np.corrcoef(Y_test_r[:, 0], Y_test_r[:, 1])[0, 1])
    plt.legend(loc="best")
    plt.xticks(())
    plt.yticks(())
    plt.show()

    # #############################################################################
    # PLS regression, with multivariate response, a.k.a. PLS2

    n = 1000
    q = 3
    p = 10
    X = np.random.normal(size=n * p).reshape((n, p))
    B = np.array([[1, 2] + [0] * (p - 2)] * q).T
    # each Yj = 1*X1 + 2*X2 + noize
    Y = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5

    pls2 = PLSRegression(n_components=3)
    pls2.fit(X, Y)
    print("True B (such that: Y = XB + Err)")
    print(B)
    # compare pls2.coef_ with B
    print("Estimated B")
    print(np.round(pls2.coef_, 1))
    pls2.predict(X)

    # PLS regression, with univariate response, a.k.a. PLS1

    n = 1000
    p = 10
    X = np.random.normal(size=n * p).reshape((n, p))
    y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
    pls1 = PLSRegression(n_components=3)
    pls1.fit(X, y)
    # note that the number of components exceeds 1 (the dimension of y)
    print("Estimated betas")
    print(np.round(pls1.coef_, 1))

    # #############################################################################
    # CCA (PLS mode B with symmetric deflation)

    cca = CCA(n_components=2)
    cca.fit(X_train, Y_train)
    X_train_r, Y_train_r = cca.transform(X_train, Y_train)
    X_test_r, Y_test_r = cca.transform(X_test, Y_test)
コード例 #32
0
    plt.yticks(())
    plt.subplot(223)
    plt.plot(Y_train_r[:, 0], Y_train_r[:, 1], "*b", label="train")
    plt.plot(Y_test_r[:, 0], Y_test_r[:, 1], "*r", label="test")
    plt.xlabel("Y comp. 1")
    plt.ylabel("Y comp. 2")
    plt.title('Y comp. 1 vs Y comp. 2 , (test corr = %.2f)'% numpy.corrcoef(Y_test_r[:, 0], Y_test_r[:, 1])[0, 1])
    plt.legend(loc="best")
    plt.xticks(())
    plt.yticks(())
    plt.savefig(output_file)
    plt.close()

# PLSCA
plsca = PLSCanonical(n_components=2)
plsca.fit(Xtrain, Ytrain)
# PLSCanonical(algorithm='nipals', copy=True, max_iter=500, n_components=2,
#       scale=True, tol=1e-06)
X_train_r, Y_train_r = plsca.transform(Xtrain, Ytrain)
X_test_r, Y_test_r = plsca.transform(Xtest, Ytest)
do_plot(X_train_r,Y_train_r,X_test_r,Y_test_r,'%s/PLSCA_2comp_norm.pdf' %output_folder)

# CCA
# probably not necessary, but just in case the data was modified in some way
Ytrain = norm.loc[train,:]
Ytest = norm.loc[holdout,:]
Xtrain = numpy.array(X.loc[train,:]) 
Xtest = X.loc[holdout,:]
cca = CCA(n_components=2)
cca.fit(Xtrain, Ytrain)
# CCA(copy=True, max_iter=500, n_components=2, scale=True, tol=1e-06)
コード例 #33
0
plssvd = PLSSVD(n_components=2)
xt,yt = plssvd.fit_transform(dataTrain,Ytrain)
fig = plt.figure()
util.plotData(fig,xt,labelsTrain,classColors)

u = plssvd.x_weights_
plt.quiver(u[0,0],u[1,0],color='k',edgecolor='k',lw=1,scale=0.1,figure=fig)
plt.quiver(-u[1,0],u[0,0],color='k',edgecolor='k',lw=1,scale=0.4,figure=fig)

#%% PLS mode-A
lda = LDA()
nComponents = np.arange(1,nClasses+1)
plsCanScores = np.zeros((2,np.alen(nComponents)))
for i,n in enumerate(nComponents):
    plscan = PLSCanonical(n_components=n)
    plscan.fit(dataTrain,Ytrain)
    dataTrainT = plscan.transform(dataTrain)
    dataTestT = plscan.transform(dataTest)
    plsCanScores[:,i] = util.classify(dataTrainT,dataTestT,labelsTrain,labelsTest)
fig = plt.figure()
util.plotAccuracy(fig,nComponents,plsCanScores)
plt.title('PLS Canonical accuracy',figure=fig)

plscan = PLSCanonical(n_components=2)
xt,yt = plscan.fit_transform(dataTrain,Ytrain)
fig = plt.figure()
util.plotData(fig,xt,labelsTrain,classColors)

u = plscan.x_weights_
plt.quiver(u[0,0],u[1,0],color='k',edgecolor='k',lw=1,scale=0.1,figure=fig)
plt.quiver(-u[1,0],u[0,0],color='k',edgecolor='k',lw=1,scale=0.4,figure=fig)
コード例 #34
0
def training_lda_TD4_inter(my_clfs, trains_S0, trains_shift, classes, **kw):
    print 'training_lda_TD4_inter.........'
    start_time = time.time()

    log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num = kw['action_num']

    print "----training " + kw[
        'feature_type'] + " inter, training by position O, testing by electrode shift "

    cv = 5
    results = []
    results.append(['Feat', 'Algorithm', 'Channel_Pos', 'Accuracy', 'std'])
    log_file = 'feat_' + kw['feature_type'] + '_inter'

    clf = sklearn.lda.LDA(solver='svd',
                          shrinkage=None,
                          priors=None,
                          n_components=None,
                          store_covariance=False,
                          tol=0.0001)

    data_num = trains_S0.shape[0] / action_num
    # print data_num

    scores = sklearn.cross_validation.cross_val_score(clf,
                                                      trains_S0,
                                                      classes,
                                                      cv=cv)
    results.append(
        ['feat_TD4_cv_' + str(cv), 'lda', 'S0',
         scores.mean(),
         scores.std()])

    kf = KFold(data_num, n_folds=cv)

    for idx, channel_pos in enumerate(kw['pos_list']):

        X_test = trains_shift[:, idx * chan_len:idx * chan_len + chan_len]
        y_test = classes

        iteration = cv
        scores = np.zeros((iteration, ))
        cca_scores = np.zeros((iteration, ))

        iteration -= 1
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)
            for action_idx in range(action_num):
                train_idx_all = np.concatenate(
                    (train_idx_all, train_idx * action_idx), axis=0)
                test_idx_all = np.concatenate(
                    (test_idx_all, test_idx * action_idx), axis=0)

            # X_train, y_train = trains_S0[train_idx_all], classes[train_idx_all]
            X_train, y_train = trains_S0, classes

            X_train_shift, y_train_shift = X_test[train_idx_all], classes[
                train_idx_all]

            X_train_all = np.concatenate((X_train, X_train_shift), axis=0)
            y_train_all = np.concatenate((y_train, y_train_shift), axis=0)

            sys.exit(0)
            score_inter = clf.fit(X_train_all,
                                  y_train_all).score(X_test, y_test)
            scores[iteration] = score_inter.mean()
            # print X_train.shape, y_train.shape

            if channel_pos != 'S0':

                # plsca = joblib.load(transform_fold+'/cca_transform_'+kw['subject']+'_'+channel_pos+'.model')
                plsca = PLSCanonical(n_components=14)
                # print X_test.shape, X_train.shape
                # sys.exit(0)
                plsca.fit(X_test[train_idx], X_train)
                X_test_cca, X_train_cca = plsca.transform(X_test, X_train)
                cca_score = clf.fit(X_train_cca,
                                    y_train).score(X_test_cca, y_test)
                cca_scores[iteration] = cca_score.mean()

            iteration -= 1

        # print scores
        # print cca_scores
        # sys.exit(0)
        results.append(
            ['feat_TD4', 'lda', channel_pos,
             np.mean(scores),
             np.std(scores)])
        results.append([
            'feat_TD4', 'lda_cca', channel_pos,
            np.mean(cca_scores),
            np.std(cca_scores)
        ])

    log_result(results, log_fold + '/' + log_file + '_' + str(kw['num']), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_' + channel_pos + '_' + str(
        kw['num'])
    print '----training TD4 time elapsed:', time.time() - start_time
コード例 #35
0
ファイル: pls_audioset.py プロジェクト: mridul/project-adv-ml
def pls_decomposition(videos, audios, n_components=256):
    plsca = PLSCanonical(n_components=n_components)
    plsca.fit(audios, videos)

    videos_c, audios_c = plsca.transform(videos, audios)
    return videos_c, audios_c
コード例 #36
0
def training_lda_TD4_intra(my_clfs, trains, classes, **kw):

    start_time = time.time()
    if (kw.has_key('log_fold')):
        log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num = kw['action_num']
    cv = 3
    results = []
    results.append([
        'Feat', 'Algorithm', 'n_components', 'Channel_Pos', 'Accuracy', 'std'
    ])
    log_file = 'feat_' + kw['feature_type'] + '_intra'

    clf = sklearn.lda.LDA(solver='svd',
                          shrinkage=None,
                          priors=None,
                          n_components=None,
                          store_covariance=False,
                          tol=0.0001)

    data_num = trains.shape[0] / action_num

    scores = sklearn.cross_validation.cross_val_score(clf,
                                                      trains,
                                                      classes,
                                                      cv=cv)
    results.append([
        'feat_TD4_cv_' + str(cv), 'lda', 'ALL', 0,
        scores.mean(),
        scores.std()
    ])

    # 组内训练策略 9组数据
    print '组内训练.............'
    for idx, channel_pos in enumerate(kw['pos_list']):
        # print '----training TD4 intra , channel_pos: ', channel_pos,'......'
        trains_intra = trains[:, idx * chan_len:idx * chan_len + chan_len]

        scores = sklearn.cross_validation.cross_val_score(clf,
                                                          trains_intra,
                                                          classes,
                                                          cv=cv)
        results.append([
            'feat_TD4_cv_' + str(cv), 'lda', 0, channel_pos,
            scores.mean(),
            scores.std()
        ])

    # 中心训练策略
    print '中心训练策略.............'
    trains_intra_S0 = trains[:, 0:chan_len]
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        tests_shift = trains[:, idx * chan_len:idx * chan_len + chan_len]
        # if channel_pos == 'L2':
        #     print idx*chan_len, idx*chan_len+chan_len, tests_shift.shape, trains.shape
        #     sys.exit(0)
        scores = clf.fit(trains_intra_S0, classes).score(tests_shift, classes)
        results.append([
            'feat_TD4_cv_' + str(cv), 'lda', 0,
            'train S0' + ' test ' + channel_pos,
            scores.mean(),
            scores.std()
        ])

    # 组训练策略(不同于组内训练策略) 5-fold
    print '组训练策略.............'
    trains_intra_S0 = trains[:, 0:chan_len]
    kf = KFold(data_num, n_folds=cv)
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        itera = cv
        scores = np.zeros((itera, ))
        # stds = np.zeros( (itera,) )
        itera -= 1
        trains_shift = trains[:, idx * chan_len:idx * chan_len + chan_len]
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)

            for action_idx in range(action_num):
                train_idx_all = np.concatenate(
                    (train_idx_all, train_idx * (action_idx + 1)), axis=0)
                test_idx_all = np.concatenate(
                    (test_idx_all, test_idx * (action_idx + 1)), axis=0)

            X_train = np.concatenate(
                (trains_intra_S0[train_idx_all], trains_shift[train_idx_all]),
                axis=0)
            y_train = np.concatenate(
                (classes[train_idx_all], classes[train_idx_all]), axis=0)

            X_test = trains_shift[test_idx_all]
            y_test = classes[test_idx_all]

            # X_test = trains_shift
            # y_test = classes

            score = clf.fit(X_train, y_train).score(X_test, y_test)
            scores[itera] = score.mean()

            itera -= 1
        # print scores
        results.append([
            'feat_TD4_cv_' + str(cv), 'lda', 0, 'S0 + ' + channel_pos,
            np.mean(scores),
            np.std(scores)
        ])

    # 基于CCA的训练策略 5-fold 交叉验证
    print 'CCA训练策略.............'
    trains_S0 = trains[:, 0:chan_len]
    n_components_list = [6, 8, 10, 12, 14, 16]  # 子空间维数
    # n_components_list = [12,14,16]
    kf = KFold(data_num, n_folds=cv)
    for n_components in n_components_list:

        for idx, channel_pos in enumerate(kw['pos_list']):
            if channel_pos == 'S0':
                continue
            itera = cv
            scores = np.zeros((itera, ))
            stds = np.zeros((itera, ))
            itera -= 1
            trains_shift = trains[:, idx * chan_len:idx * chan_len + chan_len]
            for train_idx, test_idx in kf:
                train_idx_all = np.array([], np.int)
                test_idx_all = np.array([], np.int)
                for action_idx in range(action_num):
                    train_idx_all = np.concatenate(
                        (train_idx_all, train_idx * (action_idx + 1)), axis=0)
                    test_idx_all = np.concatenate(
                        (test_idx_all, test_idx * (action_idx + 1)), axis=0)
                # print train_idx_all.shape, train_idx_all, test_idx_all.shape, test_idx_all
                # plsca.fit(trains_shift[train_idx_all], trains_S0[train_idx_all])
                plsca = PLSCanonical(n_components=n_components)
                plsca.fit(trains_shift, trains_S0)
                trains_shift_cca, trains_S0_cca = plsca.transform(
                    trains_shift, trains_S0)
                X_trains = np.concatenate(
                    (trains_S0_cca, trains_shift_cca[train_idx_all]), axis=0)
                y_trains = np.concatenate((classes, classes[train_idx_all]),
                                          axis=0)
                score = clf.fit(X_trains,
                                y_trains).score(trains_shift_cca[test_idx_all],
                                                classes[test_idx_all])

                scores[itera] = score.mean()
                # stds[itera] = score.std()
                itera -= 1

            results.append([
                'feat_TD4_cv_' + str(cv), 'lda_cca', n_components,
                'S0 + ' + channel_pos,
                np.mean(scores),
                np.std(scores)
            ])

    log_result(results,
               log_fold + '/' + log_file + '_action_1-' + str(action_num), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_action_1-' + str(
        action_num)
    print '----training TD4 time elapsed:', time.time() - start_time
コード例 #37
0
def training_lda_TD4_inter(my_clfs, trains_S0, trains_shift, classes, **kw):
    print 'training_lda_TD4_inter.........'
    start_time = time.time()

    log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num  = kw['action_num']

    print "----training "+kw['feature_type']+" inter, training by position O, testing by electrode shift "

    cv = 5
    results = []
    results.append(['Feat', 'Algorithm','Channel_Pos', 'Accuracy', 'std'])
    log_file = 'feat_'+kw['feature_type']+'_inter'

    clf = sklearn.lda.LDA(solver='svd', shrinkage=None, priors=None,
                          n_components=None, store_covariance=False,
                          tol=0.0001)

    data_num = trains_S0.shape[0]/action_num
    # print data_num
    
    scores = sklearn.cross_validation.cross_val_score(
        clf, trains_S0, classes, cv=cv)
    results.append(['feat_TD4_cv_'+str(cv), 'lda', 'S0',
                    scores.mean(), scores.std()])
    
    kf = KFold(data_num, n_folds=cv)
    
    for idx, channel_pos in enumerate(kw['pos_list']):

        X_test = trains_shift[:,idx*chan_len:idx*chan_len+chan_len]
        y_test = classes

        iteration = cv
        scores = np.zeros((iteration,))
        cca_scores = np.zeros((iteration,))
        
        
        iteration -= 1
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)
            for action_idx in range(action_num):
                train_idx_all = np.concatenate( (train_idx_all, train_idx*action_idx), axis=0)
                test_idx_all = np.concatenate( (test_idx_all, test_idx*action_idx), axis=0)

            # X_train, y_train = trains_S0[train_idx_all], classes[train_idx_all]
            X_train, y_train = trains_S0, classes

            X_train_shift, y_train_shift = X_test[train_idx_all], classes[train_idx_all]
            
            X_train_all = np.concatenate( (X_train, X_train_shift), axis=0)
            y_train_all = np.concatenate( (y_train, y_train_shift), axis=0)
            
            sys.exit(0)
            score_inter = clf.fit(X_train_all, y_train_all).score(X_test, y_test)
            scores[iteration] = score_inter.mean()
            # print X_train.shape, y_train.shape
            

            if channel_pos != 'S0':

                # plsca = joblib.load(transform_fold+'/cca_transform_'+kw['subject']+'_'+channel_pos+'.model')
                plsca = PLSCanonical(n_components=14)
                # print X_test.shape, X_train.shape
                # sys.exit(0)
                plsca.fit(X_test[train_idx], X_train)
                X_test_cca, X_train_cca = plsca.transform(X_test, X_train)
                cca_score = clf.fit(X_train_cca, y_train).score(X_test_cca, y_test)
                cca_scores[iteration] = cca_score.mean()

            iteration -= 1

        # print scores
        # print cca_scores
        # sys.exit(0)
        results.append(['feat_TD4', 'lda', 
                        channel_pos, np.mean(scores), np.std(scores)])
        results.append(['feat_TD4', 'lda_cca', 
                        channel_pos, np.mean(cca_scores), np.std(cca_scores)])

            
    
    
    log_result(results, log_fold + '/' + log_file + '_' + str(kw['num']), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_' + channel_pos + '_' + str(kw['num'])
    print '----training TD4 time elapsed:', time.time() - start_time

    

    # mean_shift = 0
    # std_shift = 0
    # for i in range(2, 10):
    #     mean_shift += results[i][4]
    #     std_shift += results[i][5]
    # mean_shift /= 9
    # std_shift /= 9

    # results.append(['feat_TD4','lda(svd;tol=0.0001)', 'Shift_means', '1.0', mean_shift, std_shift])

    # mean_all = 0
    # std_all = 0
    # for i in range(1, 10):
    #     mean_all += results[i][4]
    #     std_all += results[i][5]
    # mean_all /= 9
    # std_all /= 9
コード例 #38
0
x_train = x[:n / 2]
y_train = y[:n / 2]
x_test = x[n / 2:]
y_test = y[n / 2:]

print("corr(x)")
print(np.round(np.corrcoef(x.T), 2))

print("corr(y)")
print(np.round(np.corrcoef(y.T), 2))

#################################################################
# Canonical (symmetric) PLS
# transform the data
plsca = PLSCanonical(n_components=2)
plsca.fit(x_train, y_train)
x_train_r, y_train_r = plsca.transform(x_train, y_train)
x_test_r, y_test_r = plsca.transform(x_test, y_test)

# Scatter plot of scores
# ~~~~~~~~~~~~~~~~~~~~~~
# 1) On diagonal plot x vs y scores on each components
plt.figure(figsize=(12, 8))
plt.subplot(221)
plt.plot(x_train_r[:, 0], y_train_r[:, 0], "ob", label="train")
plt.plot(x_test_r[:, 0], y_test_r[:, 0], "or", label="test")
plt.xlabel("x scores")
plt.ylabel("y scores")
plt.title('Comp. 1: x vs y (test corr = %.2f)' %
          np.corrcoef(x_test_r[:, 0], y_test_r[:, 0])[0, 1])
plt.xticks(())