def feature_action_sensitivity(feature_type='TD4'):
    ''' 对每个特征,分析其在不移位和移位情况下的协方差 '''
    results = []
    
    subjects = ['subject_' + str(i + 1) for i in range(1)]

    channel_pos_list = ['S0',                                             # 中心位置
                        'U1', 'U2', 'D1', 'D2', 'L1', 'L2', 'R1', 'R2']  # 上 下 左 右
    pos_num = len(channel_pos_list)
    
    actions = [i+1 for i in range(7)]
    action_num = len(actions)                        # 7 动作类型个数

    if feature_type == 'TD4':
        feature_list = ['MAV', 'ZC', 'SSC', 'WL']
    elif feature_type == 'TD5':
        feature_list = ['MAV', 'ZC', 'SSC', 'WL','RMS']
    feat_num = len(feature_list)                    # 4 特征维度

    groups = [i+1 for i in range(4)]
    group_num = len(groups)                         # 4 通道数
    group_span = group_num*feat_num
    # print group_span
    action_span = feat_num*group_num                # 16
    # print groups, channel_num, channel_span, feat_num
    
    train_dir = 'train4_250_100'


    results.append(['subject', 'action', 'feature', 'group', 'means_shift', 'std_shift'] )
    plsca = PLSCanonical(n_components=2)
    # pos = 1
    k=0
    for pos_idx, pos_name in enumerate(channel_pos_list[1:]):
        pos = pos_idx+1
        for subject in subjects:
            # shift_simulation = np.ones((action_num,action_span,2))
            trains, classes = data_load.load_feature_dataset(train_dir, subject, feature_type)
            # m = trains.shape[0]
            # print trains.shape, classes.shape, m
            # print group_span, group_span*2
            # sys.exit(0)
            # m = trains.shape[0]*2/3
            m = trains.shape[0]/2
            X_train = trains[:m, group_span*pos: group_span*(pos+1)]
            Y_train = trains[:m:, :group_span]
            X_test = trains[m:, group_span*pos: group_span*(pos+1)]
            Y_test = trains[m:, :group_span]

            plsca.fit(X_train, Y_train)
            X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
            X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

            filename=subject+'_'+pos_name
            # plot_plsc_figure(X_train_r,Y_train_r,X_test_r, Y_test_r, filename)
            plot_plsc_figure_two(X_train_r,Y_train_r,X_test_r, Y_test_r, filename)
def correlation_matching(I_tr, T_tr, I_te, T_te, n_comps):
    """ Learns correlation matching (CM) over I_tr and T_tr
        and applies it to I_tr, T_tr, I_te, T_te
        
        
        Parameters
        ----------
        
        I_tr: np.ndarray [shape=(n_tr, d_I)]
            image data matrix for training
        
        T_tr: np.ndarray [shape=(n_tr, d_T)]
            text data matrix for training
        
        I_te: np.ndarray [shape=(n_te, d_I)]
            image data matrix for testing
        
        T_te: np.ndarray [shape=(n_te, d_T)]
            text data matrix for testing
        
        n_comps: int > 0 [scalar]
            number of canonical componens to use
            
        Returns
        -------
        
        I_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            image data matrix represetned in correlation space
        
        T_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            text data matrix represetned in correlation space
        
        I_te_cca : np.ndarray [shape=(n_te, n_comps)]
            image data matrix represetned in correlation space
        
        T_te_cca : np.ndarray [shape=(n_te, n_comps)]
            text data matrix represetned in correlation space
        
        """


    # sclale image and text data
    I_scaler = StandardScaler()
    I_tr = I_scaler.fit_transform(I_tr)
    I_te = I_scaler.transform(I_te)

    T_scaler = StandardScaler()
    T_tr = T_scaler.fit_transform(T_tr)
    T_te = T_scaler.transform(T_te)

    cca = PLSCanonical(n_components=n_comps, scale=False)
    cca.fit(I_tr, T_tr)

    I_tr_cca, T_tr_cca = cca.transform(I_tr, T_tr)
    I_te_cca, T_te_cca = cca.transform(I_te, T_te)

    return I_tr_cca, T_tr_cca, I_te_cca, T_te_cca
Beispiel #3
0
def test_pls_canonical_basics():
    # Basic checks for PLSCanonical
    d = load_linnerud()
    X = d.data
    Y = d.target

    pls = PLSCanonical(n_components=X.shape[1])
    pls.fit(X, Y)

    assert_matrix_orthogonal(pls.x_weights_)
    assert_matrix_orthogonal(pls.y_weights_)
    assert_matrix_orthogonal(pls._x_scores)
    assert_matrix_orthogonal(pls._y_scores)

    # Check X = TP' and Y = UQ'
    T = pls._x_scores
    P = pls.x_loadings_
    U = pls._y_scores
    Q = pls.y_loadings_
    # Need to scale first
    Xc, Yc, x_mean, y_mean, x_std, y_std = _center_scale_xy(X.copy(),
                                                            Y.copy(),
                                                            scale=True)
    assert_array_almost_equal(Xc, np.dot(T, P.T))
    assert_array_almost_equal(Yc, np.dot(U, Q.T))

    # Check that rotations on training data lead to scores
    Xt = pls.transform(X)
    assert_array_almost_equal(Xt, pls._x_scores)
    Xt, Yt = pls.transform(X, Y)
    assert_array_almost_equal(Xt, pls._x_scores)
    assert_array_almost_equal(Yt, pls._y_scores)

    # Check that inverse_transform works
    X_back = pls.inverse_transform(Xt)
    assert_array_almost_equal(X_back, X)
    _, Y_back = pls.inverse_transform(Xt, Yt)
    assert_array_almost_equal(Y_back, Y)
Beispiel #4
0
def drawFaces(emb1, emb2, wordRanking, n, reduction="cut"):
    """
    Plot Chernoff faces for n most/less interesting words
    From: https://gist.github.com/aflaxman/4043086
    :param n: if negative: less interesting
    :param reduction:
    :return:
    """
    s1 = None
    s2 = None
    if reduction=="cut":
        s1 = emb1.getSimMatrix()[0:,0:18]
        s2 = emb2.getSimMatrix()[0:,0:18]
    elif reduction=="svd":
        s1 = TruncatedSVD(n_components=k).fit_transform(emb1.getSimMatrix())
        s2 = TruncatedSVD(n_components=k).fit_transform(emb2.getSimMatrix())
    elif reduction=="cca": #use orginal embeddings, not similarity matrix for reduction
        cca = PLSCanonical(n_components=18)
        cca.fit(emb1.m, emb2.m)
        s1, s2 = cca.transform(emb1.m, emb2.m)
    interesting = list()
    name = str(n)+"."+reduction
    if n<0: #plot uninteresting words
        n *= -1
        interesting = [wordRanking[::-1][i] for i in xrange(n)]
    else:
        interesting = [wordRanking[i] for i in xrange(n)]
    fig = plt.figure(figsize=(11,11))
    c = 0
    for i in range(n):
        word = interesting[i]
        j = emb1.d[word]
        ax = fig.add_subplot(n,2,c+1,aspect='equal')
        mpl_cfaces.cface(ax, *s1[j]) #nice for similarity matrix *s1[j][:18]
        ax.axis([-1.2,1.2,-1.2,1.2])
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title(word)
        ax2 = fig.add_subplot(n,2,c+2,aspect='equal')
        mpl_cfaces.cface(ax2, *s2[j])
        ax2.axis([-1.2,1.2,-1.2,1.2])
        ax2.set_xticks([])
        ax2.set_yticks([])
        ax2.set_title(word)
        c += 2
    plotname = "plots/"+NAME+".cface_s1s2_"+name+".png"
    fig.savefig(plotname)
    print("\tSaved Chernoff faces plot in '%s'" % (plotname))
Beispiel #5
0
class _PLSCanonicalImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)

    def predict(self, X):
        return self._wrapped_model.predict(X)
Beispiel #6
0
    def getCCARanking(self, filter=None):
        """
        Compare how far apart words are in projection into common space by CCA
        :return:
        """
        cca = PLSCanonical(n_components=self.n)
        cca.fit(self.emb1.m, self.emb2.m)
        m1transformed, m2transformed = cca.transform(self.emb1.m, self.emb2.m)

        #get distances between vectors
        assert self.emb1.vocab_size == self.emb2.vocab_size
        distDict = dict()
        for i in xrange(self.emb1.vocab_size):
            v1 = m1transformed[i]
            v2 = m2transformed[i]
            w = self.emb1.rd[i]
            distDict[w] = 1-Similarity.euclidean(v1,v2)
        ranked = sorted(distDict.iteritems(), key=itemgetter(1), reverse=True)
        if filter is not None:
            ranked = [(w, s) for (w, s) in distDict.iteritems() if w in filter]
        return ranked
Beispiel #7
0
 def plotClustersCCA(self, filter=None):
     """
     Plot clusters in 2dim CCA space: Comparable across embeddings
     :return:
     """
     if len(self.cluster1) <= 1:
         cmap1 = plt.get_cmap('jet', 2)
     else:
         cmap1 = plt.get_cmap('jet', len(self.cluster1))
     cmap1.set_under('gray')
     if len(self.cluster2) <= 1:
         cmap2 = plt.get_cmap('jet', 2)
     else:
         cmap2 = plt.get_cmap('jet', len(self.cluster2))
     cmap2.set_under('gray')
     cca = PLSCanonical(n_components=2)
     cca.fit(self.emb1.m, self.emb2.m)
     m1transformed, m2transformed = cca.transform(self.emb1.m, self.emb2.m)
     labels1 = [self.emb1.rd[i] for i in xrange(self.emb1.vocab_size)]
     colors1 = [self.word2cluster1[self.emb1.rd[i]] for i in xrange(self.emb1.vocab_size)]
     labels2 = [self.emb2.rd[i] for i in xrange(self.emb2.vocab_size)]
     colors2 = [self.word2cluster2[self.emb2.rd[i]] for i in xrange(self.emb2.vocab_size)]
     if filter is not None:
         print("\tFiltering samples to plot")
         filteredIds = [self.emb1.d[w] for w in filter] #get ids for words in filter
         m1transformed = m1transformed[filteredIds]
         m2transformed = m2transformed[filteredIds]
         labels1 = [l for l in labels1 if l in filter]
         labels2 = [l for l in labels2 if l in filter]
     elif m1transformed.shape[0] > 100: #sample indices to display, otherwise it's too messy
         filteredIds = np.random.randint(low=0, high=m1.transformed.shape[0]) #sample filteredIds
         m1transformed = m1transformed[filteredIds]
         m2transformed = m2transformed[filteredIds]
         labels1 = [l for l in labels1 if l in filter]
         labels2 = [l for l in labels2 if l in filter]
     plotWithLabelsAndColors(m1transformed, labels1, colors=colors1, cmap=cmap1, filename="plots/"+NAME+".cca1.png", dimRed="CCA")
     plotWithLabelsAndColors(m2transformed, labels2, colors=colors2, cmap=cmap2, filename="plots/"+NAME+".cca2.png", dimRed="CCA")
    plt.xlim(1,np.amax(nComponents))
    plt.title('PLS SVD accuracy')
    plt.xlabel('Number of components')
    plt.ylabel('accuracy')
    plt.legend (['LR','LDA','GNB','Linear SVM','rbf SVM'],loc='lower right')
    plt.grid(True)

if (0):
    #%% PLS Cannonical
    nComponents = np.arange(1,nClasses+1)
    plsCanScores = np.zeros((5,np.alen(nComponents)))
    for i,n in enumerate(nComponents):
        plscan = PLSCanonical(n_components=n)
        plscan.fit(Xtrain,Ytrain)
        XtrainT = plscan.transform(Xtrain)
        XtestT = plscan.transform(Xtest)
        plsCanScores[:,i] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)

    
    plscan = PLSCanonical(n_components=2)
    plscan.fit(Xtrain,Ytrain)
    xt = plscan.transform(Xtrain)
    fig = plt.figure()
    util.plotData(fig,xt,labelsTrain,classColors)
    plt.title('First 2 components of projected data')
    

    #%% Plot accuracies for PLSSVD 
    plt.figure()
    for i in range (5):
Beispiel #9
0
        vec_c.append(i)

    if j < len_train:
        l_p.append(vec_p)
        l_c.append(vec_c)
    else:
        l_p_t.append(vec_p)
        l_c_t.append(vec_c)
    j += 1

sorted_p = np.asarray(l_p)
sorted_c = np.asarray(l_c)  #Convert the input to an array

plc = PLSCanonical()
plc.fit_transform(sorted_c, sorted_p)
sorted_c, sorted_p = plc.transform(sorted_c, sorted_p)

sorted_c_test = np.asarray(l_c_t)
sorted_p_test = np.asarray(l_p_t)
sorted_c_test, sorted_p_test = plc.transform(sorted_c_test, sorted_p_test)

plr = PLSRegression()
plr.fit(sorted_c, sorted_p)
params = plr.get_params()
plr.set_params(**params)
y_score = plr.predict(sorted_c_test)
sim_count = 0

print("Test Similarity: ")
for i in range(len(y_score)):
    result_sim = 1 - spatial.distance.cosine(y_score[i], sorted_p_test[i])
Beispiel #10
0
    # set negative connectivities to 0
    edge_data = np.apply_along_axis(
        lambda x: [0 if element < 0 else element for element in x], 1,
        edge_data)

# re-split data (3 ways) for CCA
X1_train = edge_data[:140, :]
X2_train = edge_data[140:280, :]
X2_remain = edge_data[280:, :]
#cca = CCA(n_components =2)
#cca.fit(X1_train, X2_train)
cca = PLSCanonical(n_components=100)
cca.fit(X1_train, X2_train)
block_1_transformed, block_2_transformed = cca.transform(X1_train,
                                                         X2_train,
                                                         copy=False)
block_3_transformed = np.dot(X2_remain, cca.y_rotations_)

edge_data_transformed = np.vstack(
    (block_1_transformed, block_2_transformed, block_3_transformed))
# initialise the classifier

clf = svm.SVC(kernel='precomputed')

# optional shuffle
perm = np.random.permutation(n_subjects)
#print perm
#print n_subjects
labels = labels[perm]
edge_data_transformed = edge_data_transformed[perm, :]
Beispiel #11
0
    plt.xlim(1, np.amax(nComponents))
    plt.title('PLS SVD accuracy')
    plt.xlabel('Number of components')
    plt.ylabel('accuracy')
    plt.legend(['LR', 'LDA', 'GNB', 'Linear SVM', 'rbf SVM'],
               loc='lower right')
    plt.grid(True)

if (0):
    #%% PLS Cannonical
    nComponents = np.arange(1, nClasses + 1)
    plsCanScores = np.zeros((5, np.alen(nComponents)))
    for i, n in enumerate(nComponents):
        plscan = PLSCanonical(n_components=n)
        plscan.fit(Xtrain, Ytrain)
        XtrainT = plscan.transform(Xtrain)
        XtestT = plscan.transform(Xtest)
        plsCanScores[:, i] = util.classify(XtrainT, XtestT, labelsTrain,
                                           labelsTest)

    plscan = PLSCanonical(n_components=2)
    plscan.fit(Xtrain, Ytrain)
    xt = plscan.transform(Xtrain)
    fig = plt.figure()
    util.plotData(fig, xt, labelsTrain, classColors)
    plt.title('First 2 components of projected data')

    #%% Plot accuracies for PLSSVD
    plt.figure()
    for i in range(5):
        plt.plot(nComponents, plsCanScores[i, :], lw=3)
Beispiel #12
0
def pls_decomposition(videos, audios, n_components=256):
    plsca = PLSCanonical(n_components=n_components)
    plsca.fit(audios, videos)

    videos_c, audios_c = plsca.transform(videos, audios)
    return videos_c, audios_c
Beispiel #13
0
        for e in temp1[:]:
            temp4.append(e)
        for e in temp2[:]:
            temp4.append(e)
        if len(temp4) == 600 and len(temp3) == 300:
            x_n.append(temp4)
            y_n.append(temp3)

npx = np.asarray(x, dtype=np.float64)
npy = np.asarray(y, dtype=np.float64)

npxn = np.asarray(x_n, dtype=np.float64)
npyn = np.asarray(y_n, dtype=np.float64)
cca = PLSCanonical(n_components=2)
cca.fit_transform(npx, npy)
npx, npy = cca.transform(npx, npy)
npxn, npyn = cca.transform(npxn, npyn)

pls.fit(npx, npy)
params = pls.get_params(deep=True)
print(params)
pls.set_params(**params)

y_score = pls.predict(npxn)

sim_count = 0
tol = 0.1

for index in range(len(y_score)):
    sub_result = np.subtract(y_score, npyn)
    result = 1 - spatial.distance.cosine(y_score[index], npyn[index])
Beispiel #14
0
def training_lda_TD4_inter(my_clfs, trains_S0, trains_shift, classes, **kw):
    print 'training_lda_TD4_inter.........'
    start_time = time.time()

    log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num = kw['action_num']

    print "----training " + kw[
        'feature_type'] + " inter, training by position O, testing by electrode shift "

    cv = 5
    results = []
    results.append(['Feat', 'Algorithm', 'Channel_Pos', 'Accuracy', 'std'])
    log_file = 'feat_' + kw['feature_type'] + '_inter'

    clf = sklearn.lda.LDA(solver='svd',
                          shrinkage=None,
                          priors=None,
                          n_components=None,
                          store_covariance=False,
                          tol=0.0001)

    data_num = trains_S0.shape[0] / action_num
    # print data_num

    scores = sklearn.cross_validation.cross_val_score(clf,
                                                      trains_S0,
                                                      classes,
                                                      cv=cv)
    results.append(
        ['feat_TD4_cv_' + str(cv), 'lda', 'S0',
         scores.mean(),
         scores.std()])

    kf = KFold(data_num, n_folds=cv)

    for idx, channel_pos in enumerate(kw['pos_list']):

        X_test = trains_shift[:, idx * chan_len:idx * chan_len + chan_len]
        y_test = classes

        iteration = cv
        scores = np.zeros((iteration, ))
        cca_scores = np.zeros((iteration, ))

        iteration -= 1
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)
            for action_idx in range(action_num):
                train_idx_all = np.concatenate(
                    (train_idx_all, train_idx * action_idx), axis=0)
                test_idx_all = np.concatenate(
                    (test_idx_all, test_idx * action_idx), axis=0)

            # X_train, y_train = trains_S0[train_idx_all], classes[train_idx_all]
            X_train, y_train = trains_S0, classes

            X_train_shift, y_train_shift = X_test[train_idx_all], classes[
                train_idx_all]

            X_train_all = np.concatenate((X_train, X_train_shift), axis=0)
            y_train_all = np.concatenate((y_train, y_train_shift), axis=0)

            sys.exit(0)
            score_inter = clf.fit(X_train_all,
                                  y_train_all).score(X_test, y_test)
            scores[iteration] = score_inter.mean()
            # print X_train.shape, y_train.shape

            if channel_pos != 'S0':

                # plsca = joblib.load(transform_fold+'/cca_transform_'+kw['subject']+'_'+channel_pos+'.model')
                plsca = PLSCanonical(n_components=14)
                # print X_test.shape, X_train.shape
                # sys.exit(0)
                plsca.fit(X_test[train_idx], X_train)
                X_test_cca, X_train_cca = plsca.transform(X_test, X_train)
                cca_score = clf.fit(X_train_cca,
                                    y_train).score(X_test_cca, y_test)
                cca_scores[iteration] = cca_score.mean()

            iteration -= 1

        # print scores
        # print cca_scores
        # sys.exit(0)
        results.append(
            ['feat_TD4', 'lda', channel_pos,
             np.mean(scores),
             np.std(scores)])
        results.append([
            'feat_TD4', 'lda_cca', channel_pos,
            np.mean(cca_scores),
            np.std(cca_scores)
        ])

    log_result(results, log_fold + '/' + log_file + '_' + str(kw['num']), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_' + channel_pos + '_' + str(
        kw['num'])
    print '----training TD4 time elapsed:', time.time() - start_time
X_test = X[n // 2:]
Y_test = Y[n // 2:]

print("Corr(X)")
print(np.round(np.corrcoef(X.T), 2))
print("Corr(Y)")
print(np.round(np.corrcoef(Y.T), 2))

# #############################################################################
# Canonical (symmetric) PLS

# Transform data
# ~~~~~~~~~~~~~~
plsca = PLSCanonical(n_components=2)
plsca.fit(X_train, Y_train)
X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

# Scatter plot of scores
# ~~~~~~~~~~~~~~~~~~~~~~
# 1) On diagonal plot X vs Y scores on each components
plt.figure(figsize=(12, 8))
plt.subplot(221)
plt.plot(X_train_r[:, 0], Y_train_r[:, 0], "ob", label="train")
plt.plot(X_test_r[:, 0], Y_test_r[:, 0], "or", label="test")
plt.xlabel("x scores")
plt.ylabel("y scores")
plt.title('Comp. 1: X vs Y (test corr = %.2f)' %
          np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1])
plt.xticks(())
plt.yticks(())
Beispiel #16
0
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.cross_decomposition import PLSCanonical
from sklearn.neighbors import KNeighborsClassifier
import math
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

dataSet = datasets.load_digits()
data = dataSet["data"]
target = dataSet["target"]

plsca = PLSCanonical(n_components=2)
plsca.fit(data, target)

X_train_r, Y_train_r = plsca.transform(data, target)

knn = math.sqrt(len(X_train_r))
knn = KNeighborsClassifier(n_neighbors=int(knn))

Y_train_r = [int(Y_train_r[i]) for i in range(0, len(Y_train_r))]

k = knn.fit(X_train_r, Y_train_r)
print(k.score(X_train_r, Y_train_r))
knn = KNeighborsClassifier(n_neighbors=4)

sfs = SFS(knn,
          k_features=3,
          forward=True,
          floating=False,
          verbose=2,
Beispiel #17
0
def training_lda_TD4_intra(my_clfs, trains, classes, **kw):

    start_time = time.time()
    if (kw.has_key('log_fold')):
        log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num = kw['action_num']
    cv = 3
    results = []
    results.append([
        'Feat', 'Algorithm', 'n_components', 'Channel_Pos', 'Accuracy', 'std'
    ])
    log_file = 'feat_' + kw['feature_type'] + '_intra'

    clf = sklearn.lda.LDA(solver='svd',
                          shrinkage=None,
                          priors=None,
                          n_components=None,
                          store_covariance=False,
                          tol=0.0001)

    data_num = trains.shape[0] / action_num

    scores = sklearn.cross_validation.cross_val_score(clf,
                                                      trains,
                                                      classes,
                                                      cv=cv)
    results.append([
        'feat_TD4_cv_' + str(cv), 'lda', 'ALL', 0,
        scores.mean(),
        scores.std()
    ])

    # 组内训练策略 9组数据
    print '组内训练.............'
    for idx, channel_pos in enumerate(kw['pos_list']):
        # print '----training TD4 intra , channel_pos: ', channel_pos,'......'
        trains_intra = trains[:, idx * chan_len:idx * chan_len + chan_len]

        scores = sklearn.cross_validation.cross_val_score(clf,
                                                          trains_intra,
                                                          classes,
                                                          cv=cv)
        results.append([
            'feat_TD4_cv_' + str(cv), 'lda', 0, channel_pos,
            scores.mean(),
            scores.std()
        ])

    # 中心训练策略
    print '中心训练策略.............'
    trains_intra_S0 = trains[:, 0:chan_len]
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        tests_shift = trains[:, idx * chan_len:idx * chan_len + chan_len]
        # if channel_pos == 'L2':
        #     print idx*chan_len, idx*chan_len+chan_len, tests_shift.shape, trains.shape
        #     sys.exit(0)
        scores = clf.fit(trains_intra_S0, classes).score(tests_shift, classes)
        results.append([
            'feat_TD4_cv_' + str(cv), 'lda', 0,
            'train S0' + ' test ' + channel_pos,
            scores.mean(),
            scores.std()
        ])

    # 组训练策略(不同于组内训练策略) 5-fold
    print '组训练策略.............'
    trains_intra_S0 = trains[:, 0:chan_len]
    kf = KFold(data_num, n_folds=cv)
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        itera = cv
        scores = np.zeros((itera, ))
        # stds = np.zeros( (itera,) )
        itera -= 1
        trains_shift = trains[:, idx * chan_len:idx * chan_len + chan_len]
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)

            for action_idx in range(action_num):
                train_idx_all = np.concatenate(
                    (train_idx_all, train_idx * (action_idx + 1)), axis=0)
                test_idx_all = np.concatenate(
                    (test_idx_all, test_idx * (action_idx + 1)), axis=0)

            X_train = np.concatenate(
                (trains_intra_S0[train_idx_all], trains_shift[train_idx_all]),
                axis=0)
            y_train = np.concatenate(
                (classes[train_idx_all], classes[train_idx_all]), axis=0)

            X_test = trains_shift[test_idx_all]
            y_test = classes[test_idx_all]

            # X_test = trains_shift
            # y_test = classes

            score = clf.fit(X_train, y_train).score(X_test, y_test)
            scores[itera] = score.mean()

            itera -= 1
        # print scores
        results.append([
            'feat_TD4_cv_' + str(cv), 'lda', 0, 'S0 + ' + channel_pos,
            np.mean(scores),
            np.std(scores)
        ])

    # 基于CCA的训练策略 5-fold 交叉验证
    print 'CCA训练策略.............'
    trains_S0 = trains[:, 0:chan_len]
    n_components_list = [6, 8, 10, 12, 14, 16]  # 子空间维数
    # n_components_list = [12,14,16]
    kf = KFold(data_num, n_folds=cv)
    for n_components in n_components_list:

        for idx, channel_pos in enumerate(kw['pos_list']):
            if channel_pos == 'S0':
                continue
            itera = cv
            scores = np.zeros((itera, ))
            stds = np.zeros((itera, ))
            itera -= 1
            trains_shift = trains[:, idx * chan_len:idx * chan_len + chan_len]
            for train_idx, test_idx in kf:
                train_idx_all = np.array([], np.int)
                test_idx_all = np.array([], np.int)
                for action_idx in range(action_num):
                    train_idx_all = np.concatenate(
                        (train_idx_all, train_idx * (action_idx + 1)), axis=0)
                    test_idx_all = np.concatenate(
                        (test_idx_all, test_idx * (action_idx + 1)), axis=0)
                # print train_idx_all.shape, train_idx_all, test_idx_all.shape, test_idx_all
                # plsca.fit(trains_shift[train_idx_all], trains_S0[train_idx_all])
                plsca = PLSCanonical(n_components=n_components)
                plsca.fit(trains_shift, trains_S0)
                trains_shift_cca, trains_S0_cca = plsca.transform(
                    trains_shift, trains_S0)
                X_trains = np.concatenate(
                    (trains_S0_cca, trains_shift_cca[train_idx_all]), axis=0)
                y_trains = np.concatenate((classes, classes[train_idx_all]),
                                          axis=0)
                score = clf.fit(X_trains,
                                y_trains).score(trains_shift_cca[test_idx_all],
                                                classes[test_idx_all])

                scores[itera] = score.mean()
                # stds[itera] = score.std()
                itera -= 1

            results.append([
                'feat_TD4_cv_' + str(cv), 'lda_cca', n_components,
                'S0 + ' + channel_pos,
                np.mean(scores),
                np.std(scores)
            ])

    log_result(results,
               log_fold + '/' + log_file + '_action_1-' + str(action_num), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_action_1-' + str(
        action_num)
    print '----training TD4 time elapsed:', time.time() - start_time
plt.gca().set_aspect('equal', adjustable='box')
plt.legend(['Cell volume', 'Age', 'Both'])

#NB: Strong colinearity between Age and Volume

# Transition rate prediction using PLS
X = dfc_g1[['vol_sm', 'Age', 'gr_sm']]  # Design matrix
y = dfc_g1['G1S_logistic']  # Response var
# Drop NaN rows
I = np.isnan(dfc_g1['gr_sm'])
X = X.loc[~I].copy()
y = y[~I]
pls_model = PLSCanonical()
pls_model.fit(scale(X), y)

X_c, y_c = pls_model.transform(scale(X), y)

# Multiple linearregression on birth size and growth rate
df['bvol'] = df['Birth volume']
df['exp_gr'] = df['Exponential growth rate']
df['g1_len'] = df['G1 length']
model = smf.ols('g1_len ~ exp_gr + bvol', data=df).fit()
model.summary()
print model.pvalues

# Delete S/G2 after first time point
g1s_marked = []
for c in collated_filtered:
    c = c[c['Phase'] != 'Daughter G1'].copy()
    g1 = c[c['Phase'] == 'G1']
    g1['G1S_mark'] = 0
def plot_compare_cross_decomposition():
    # Dataset based latent variables model

    n = 500
    # 2 latents vars:
    l1 = np.random.normal(size=n)
    l2 = np.random.normal(size=n)

    latents = np.array([l1, l1, l2, l2]).T
    X = latents + np.random.normal(size=4 * n).reshape((n, 4))
    Y = latents + np.random.normal(size=4 * n).reshape((n, 4))

    X_train = X[:n // 2]
    Y_train = Y[:n // 2]
    X_test = X[n // 2:]
    Y_test = Y[n // 2:]

    print("Corr(X)")
    print(np.round(np.corrcoef(X.T), 2))
    print("Corr(Y)")
    print(np.round(np.corrcoef(Y.T), 2))

    # #############################################################################
    # Canonical (symmetric) PLS

    # Transform data
    # ~~~~~~~~~~~~~~
    plsca = PLSCanonical(n_components=2)
    plsca.fit(X_train, Y_train)
    X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
    X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

    # Scatter plot of scores
    # ~~~~~~~~~~~~~~~~~~~~~~
    # 1) On diagonal plot X vs Y scores on each components
    plt.figure(figsize=(12, 8))
    plt.subplot(221)
    plt.scatter(X_train_r[:, 0],
                Y_train_r[:, 0],
                label="train",
                marker="o",
                s=25)
    plt.scatter(X_test_r[:, 0], Y_test_r[:, 0], label="test", marker="o", s=25)
    plt.xlabel("x scores")
    plt.ylabel("y scores")
    plt.title('Comp. 1: X vs Y (test corr = %.2f)' %
              np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1])
    plt.xticks(())
    plt.yticks(())
    plt.legend(loc="best")

    plt.subplot(224)
    plt.scatter(X_train_r[:, 1],
                Y_train_r[:, 1],
                label="train",
                marker="o",
                s=25)
    plt.scatter(X_test_r[:, 1], Y_test_r[:, 1], label="test", marker="o", s=25)
    plt.xlabel("x scores")
    plt.ylabel("y scores")
    plt.title('Comp. 2: X vs Y (test corr = %.2f)' %
              np.corrcoef(X_test_r[:, 1], Y_test_r[:, 1])[0, 1])
    plt.xticks(())
    plt.yticks(())
    plt.legend(loc="best")

    # 2) Off diagonal plot components 1 vs 2 for X and Y
    plt.subplot(222)
    plt.scatter(X_train_r[:, 0],
                X_train_r[:, 1],
                label="train",
                marker="*",
                s=50)
    plt.scatter(X_test_r[:, 0], X_test_r[:, 1], label="test", marker="*", s=50)
    plt.xlabel("X comp. 1")
    plt.ylabel("X comp. 2")
    plt.title('X comp. 1 vs X comp. 2 (test corr = %.2f)' %
              np.corrcoef(X_test_r[:, 0], X_test_r[:, 1])[0, 1])
    plt.legend(loc="best")
    plt.xticks(())
    plt.yticks(())

    plt.subplot(223)
    plt.scatter(Y_train_r[:, 0],
                Y_train_r[:, 1],
                label="train",
                marker="*",
                s=50)
    plt.scatter(Y_test_r[:, 0], Y_test_r[:, 1], label="test", marker="*", s=50)
    plt.xlabel("Y comp. 1")
    plt.ylabel("Y comp. 2")
    plt.title('Y comp. 1 vs Y comp. 2 , (test corr = %.2f)' %
              np.corrcoef(Y_test_r[:, 0], Y_test_r[:, 1])[0, 1])
    plt.legend(loc="best")
    plt.xticks(())
    plt.yticks(())
    plt.show()

    # #############################################################################
    # PLS regression, with multivariate response, a.k.a. PLS2

    n = 1000
    q = 3
    p = 10
    X = np.random.normal(size=n * p).reshape((n, p))
    B = np.array([[1, 2] + [0] * (p - 2)] * q).T
    # each Yj = 1*X1 + 2*X2 + noize
    Y = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5

    pls2 = PLSRegression(n_components=3)
    pls2.fit(X, Y)
    print("True B (such that: Y = XB + Err)")
    print(B)
    # compare pls2.coef_ with B
    print("Estimated B")
    print(np.round(pls2.coef_, 1))
    pls2.predict(X)

    # PLS regression, with univariate response, a.k.a. PLS1

    n = 1000
    p = 10
    X = np.random.normal(size=n * p).reshape((n, p))
    y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
    pls1 = PLSRegression(n_components=3)
    pls1.fit(X, y)
    # note that the number of components exceeds 1 (the dimension of y)
    print("Estimated betas")
    print(np.round(pls1.coef_, 1))

    # #############################################################################
    # CCA (PLS mode B with symmetric deflation)

    cca = CCA(n_components=2)
    cca.fit(X_train, Y_train)
    X_train_r, Y_train_r = cca.transform(X_train, Y_train)
    X_test_r, Y_test_r = cca.transform(X_test, Y_test)
def training_lda_TD4_inter(my_clfs, trains_S0, trains_shift, classes, **kw):
    print 'training_lda_TD4_inter.........'
    start_time = time.time()

    log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num  = kw['action_num']

    print "----training "+kw['feature_type']+" inter, training by position O, testing by electrode shift "

    cv = 5
    results = []
    results.append(['Feat', 'Algorithm','Channel_Pos', 'Accuracy', 'std'])
    log_file = 'feat_'+kw['feature_type']+'_inter'

    clf = sklearn.lda.LDA(solver='svd', shrinkage=None, priors=None,
                          n_components=None, store_covariance=False,
                          tol=0.0001)

    data_num = trains_S0.shape[0]/action_num
    # print data_num
    
    scores = sklearn.cross_validation.cross_val_score(
        clf, trains_S0, classes, cv=cv)
    results.append(['feat_TD4_cv_'+str(cv), 'lda', 'S0',
                    scores.mean(), scores.std()])
    
    kf = KFold(data_num, n_folds=cv)
    
    for idx, channel_pos in enumerate(kw['pos_list']):

        X_test = trains_shift[:,idx*chan_len:idx*chan_len+chan_len]
        y_test = classes

        iteration = cv
        scores = np.zeros((iteration,))
        cca_scores = np.zeros((iteration,))
        
        
        iteration -= 1
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)
            for action_idx in range(action_num):
                train_idx_all = np.concatenate( (train_idx_all, train_idx*action_idx), axis=0)
                test_idx_all = np.concatenate( (test_idx_all, test_idx*action_idx), axis=0)

            # X_train, y_train = trains_S0[train_idx_all], classes[train_idx_all]
            X_train, y_train = trains_S0, classes

            X_train_shift, y_train_shift = X_test[train_idx_all], classes[train_idx_all]
            
            X_train_all = np.concatenate( (X_train, X_train_shift), axis=0)
            y_train_all = np.concatenate( (y_train, y_train_shift), axis=0)
            
            sys.exit(0)
            score_inter = clf.fit(X_train_all, y_train_all).score(X_test, y_test)
            scores[iteration] = score_inter.mean()
            # print X_train.shape, y_train.shape
            

            if channel_pos != 'S0':

                # plsca = joblib.load(transform_fold+'/cca_transform_'+kw['subject']+'_'+channel_pos+'.model')
                plsca = PLSCanonical(n_components=14)
                # print X_test.shape, X_train.shape
                # sys.exit(0)
                plsca.fit(X_test[train_idx], X_train)
                X_test_cca, X_train_cca = plsca.transform(X_test, X_train)
                cca_score = clf.fit(X_train_cca, y_train).score(X_test_cca, y_test)
                cca_scores[iteration] = cca_score.mean()

            iteration -= 1

        # print scores
        # print cca_scores
        # sys.exit(0)
        results.append(['feat_TD4', 'lda', 
                        channel_pos, np.mean(scores), np.std(scores)])
        results.append(['feat_TD4', 'lda_cca', 
                        channel_pos, np.mean(cca_scores), np.std(cca_scores)])

            
    
    
    log_result(results, log_fold + '/' + log_file + '_' + str(kw['num']), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_' + channel_pos + '_' + str(kw['num'])
    print '----training TD4 time elapsed:', time.time() - start_time

    

    # mean_shift = 0
    # std_shift = 0
    # for i in range(2, 10):
    #     mean_shift += results[i][4]
    #     std_shift += results[i][5]
    # mean_shift /= 9
    # std_shift /= 9

    # results.append(['feat_TD4','lda(svd;tol=0.0001)', 'Shift_means', '1.0', mean_shift, std_shift])

    # mean_all = 0
    # std_all = 0
    # for i in range(1, 10):
    #     mean_all += results[i][4]
    #     std_all += results[i][5]
    # mean_all /= 9
    # std_all /= 9
def training_lda_TD4_intra(my_clfs, trains, classes, **kw):

    start_time = time.time()
    if(kw.has_key('log_fold')):
        log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num = kw['action_num']
    cv = 3
    results = []
    results.append(
        ['Feat', 'Algorithm','n_components', 'Channel_Pos', 'Accuracy', 'std'])
    log_file = 'feat_'+kw['feature_type']+'_intra'

    clf = sklearn.lda.LDA(solver='svd', shrinkage=None, priors=None,
                          n_components=None, store_covariance=False,
                          tol=0.0001)

    data_num = trains.shape[0]/action_num

    scores = sklearn.cross_validation.cross_val_score(clf, trains, classes, cv=cv)
    results.append(['feat_TD4_cv_'+str(cv), 'lda', 'ALL', 0, scores.mean(), scores.std()])
    
    # 组内训练策略 9组数据
    print '组内训练.............'
    for idx, channel_pos in enumerate(kw['pos_list']):
        # print '----training TD4 intra , channel_pos: ', channel_pos,'......'
        trains_intra = trains[:,idx*chan_len: idx*chan_len+chan_len]

        scores = sklearn.cross_validation.cross_val_score(
            clf, trains_intra, classes, cv=cv)
        results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, channel_pos, scores.mean(), scores.std()])


    # 中心训练策略
    print '中心训练策略.............'
    trains_intra_S0 = trains[:,0:chan_len]
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        tests_shift = trains[:,idx*chan_len: idx*chan_len+chan_len]
        # if channel_pos == 'L2':
        #     print idx*chan_len, idx*chan_len+chan_len, tests_shift.shape, trains.shape
        #     sys.exit(0)
        scores = clf.fit(trains_intra_S0, classes).score(tests_shift, classes)
        results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, 'train S0' + ' test ' + channel_pos, scores.mean(), scores.std()])
    
    # 组训练策略(不同于组内训练策略) 5-fold
    print '组训练策略.............'
    trains_intra_S0 = trains[:,0:chan_len]
    kf = KFold(data_num, n_folds=cv)
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        itera = cv
        scores = np.zeros( (itera,) )
        # stds = np.zeros( (itera,) )
        itera -= 1
        trains_shift = trains[:,idx*chan_len: idx*chan_len+chan_len]
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)

            for action_idx in range(action_num):
                train_idx_all = np.concatenate( (train_idx_all, train_idx*(action_idx+1)), axis=0)
                test_idx_all = np.concatenate( (test_idx_all, test_idx*(action_idx+1)), axis=0)

            X_train = np.concatenate( (trains_intra_S0[train_idx_all], trains_shift[train_idx_all]), axis=0)
            y_train = np.concatenate( (classes[train_idx_all], classes[train_idx_all]), axis=0)

            X_test = trains_shift[test_idx_all]
            y_test = classes[test_idx_all]

            # X_test = trains_shift
            # y_test = classes

            score = clf.fit(X_train, y_train).score(X_test, y_test)
            scores[itera] = score.mean()
            
            itera -= 1
        # print scores
        results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, 'S0 + '+channel_pos, np.mean(scores), np.std(scores)])
    
    # 基于CCA的训练策略 5-fold 交叉验证
    print 'CCA训练策略.............'
    trains_S0 = trains[:,0:chan_len]
    n_components_list = [6, 8, 10, 12, 14, 16]              # 子空间维数
    # n_components_list = [12,14,16]
    kf = KFold(data_num, n_folds=cv)
    for n_components in n_components_list:
        
        for idx, channel_pos in enumerate(kw['pos_list']):
            if channel_pos == 'S0':
                continue
            itera = cv
            scores = np.zeros( (itera,) )
            stds = np.zeros( (itera,) )
            itera -= 1
            trains_shift = trains[:,idx*chan_len: idx*chan_len+chan_len]
            for train_idx, test_idx in kf:
                train_idx_all = np.array([], np.int)
                test_idx_all = np.array([], np.int)
                for action_idx in range(action_num):
                    train_idx_all = np.concatenate( (train_idx_all, train_idx*(action_idx+1)), axis=0)
                    test_idx_all = np.concatenate( (test_idx_all, test_idx*(action_idx+1)), axis=0)
                # print train_idx_all.shape, train_idx_all, test_idx_all.shape, test_idx_all
                # plsca.fit(trains_shift[train_idx_all], trains_S0[train_idx_all])
                plsca = PLSCanonical(n_components=n_components)
                plsca.fit(trains_shift, trains_S0)
                trains_shift_cca, trains_S0_cca = plsca.transform(trains_shift, trains_S0)
                X_trains = np.concatenate( (trains_S0_cca, trains_shift_cca[train_idx_all]), axis=0)
                y_trains = np.concatenate( (classes, classes[train_idx_all]), axis=0)
                score = clf.fit(X_trains, y_trains).score(trains_shift_cca[test_idx_all], classes[test_idx_all])

                scores[itera] = score.mean()
                # stds[itera] = score.std()
                itera -= 1
            
            results.append(['feat_TD4_cv_'+str(cv), 'lda_cca', n_components, 'S0 + '+channel_pos, np.mean(scores), np.std(scores)])
    
    log_result(results, log_fold + '/' + log_file + '_action_1-'+str(action_num), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_action_1-'+str(action_num)
    print '----training TD4 time elapsed:', time.time() - start_time
Beispiel #22
0
xt,yt = plssvd.fit_transform(dataTrain,Ytrain)
fig = plt.figure()
util.plotData(fig,xt,labelsTrain,classColors)

u = plssvd.x_weights_
plt.quiver(u[0,0],u[1,0],color='k',edgecolor='k',lw=1,scale=0.1,figure=fig)
plt.quiver(-u[1,0],u[0,0],color='k',edgecolor='k',lw=1,scale=0.4,figure=fig)

#%% PLS mode-A
lda = LDA()
nComponents = np.arange(1,nClasses+1)
plsCanScores = np.zeros((2,np.alen(nComponents)))
for i,n in enumerate(nComponents):
    plscan = PLSCanonical(n_components=n)
    plscan.fit(dataTrain,Ytrain)
    dataTrainT = plscan.transform(dataTrain)
    dataTestT = plscan.transform(dataTest)
    plsCanScores[:,i] = util.classify(dataTrainT,dataTestT,labelsTrain,labelsTest)
fig = plt.figure()
util.plotAccuracy(fig,nComponents,plsCanScores)
plt.title('PLS Canonical accuracy',figure=fig)

plscan = PLSCanonical(n_components=2)
xt,yt = plscan.fit_transform(dataTrain,Ytrain)
fig = plt.figure()
util.plotData(fig,xt,labelsTrain,classColors)

u = plscan.x_weights_
plt.quiver(u[0,0],u[1,0],color='k',edgecolor='k',lw=1,scale=0.1,figure=fig)
plt.quiver(-u[1,0],u[0,0],color='k',edgecolor='k',lw=1,scale=0.4,figure=fig)
Beispiel #23
0
X_test = X[n // 2:]
Y_test = Y[n // 2:]

print("Corr(X)")
print(np.round(np.corrcoef(X.T), 2))
print("Corr(Y)")
print(np.round(np.corrcoef(Y.T), 2))

# #############################################################################
# Canonical (symmetric) PLS

# Transform data
# ~~~~~~~~~~~~~~
plsca = PLSCanonical(n_components=2)
plsca.fit(X_train, Y_train)
X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

# Scatter plot of scores
# ~~~~~~~~~~~~~~~~~~~~~~
# 1) On diagonal plot X vs Y scores on each components
plt.figure(figsize=(12, 8))
plt.subplot(221)
plt.scatter(X_train_r[:, 0],
            Y_train_r[:, 0],
            label="train",
            marker="o",
            c="b",
            s=25)
plt.scatter(X_test_r[:, 0],
            Y_test_r[:, 0],
Beispiel #24
0
y_train = y[:n / 2]
x_test = x[n / 2:]
y_test = y[n / 2:]

print("corr(x)")
print(np.round(np.corrcoef(x.T), 2))

print("corr(y)")
print(np.round(np.corrcoef(y.T), 2))

#################################################################
# Canonical (symmetric) PLS
# transform the data
plsca = PLSCanonical(n_components=2)
plsca.fit(x_train, y_train)
x_train_r, y_train_r = plsca.transform(x_train, y_train)
x_test_r, y_test_r = plsca.transform(x_test, y_test)

# Scatter plot of scores
# ~~~~~~~~~~~~~~~~~~~~~~
# 1) On diagonal plot x vs y scores on each components
plt.figure(figsize=(12, 8))
plt.subplot(221)
plt.plot(x_train_r[:, 0], y_train_r[:, 0], "ob", label="train")
plt.plot(x_test_r[:, 0], y_test_r[:, 0], "or", label="test")
plt.xlabel("x scores")
plt.ylabel("y scores")
plt.title('Comp. 1: x vs y (test corr = %.2f)' %
          np.corrcoef(x_test_r[:, 0], y_test_r[:, 0])[0, 1])
plt.xticks(())
plt.yticks(())