예제 #1
0
def correlation_matching(I_tr, T_tr, I_te, T_te, n_comps):
    """ Learns correlation matching (CM) over I_tr and T_tr
        and applies it to I_tr, T_tr, I_te, T_te
        
        
        Parameters
        ----------
        
        I_tr: np.ndarray [shape=(n_tr, d_I)]
            image data matrix for training
        
        T_tr: np.ndarray [shape=(n_tr, d_T)]
            text data matrix for training
        
        I_te: np.ndarray [shape=(n_te, d_I)]
            image data matrix for testing
        
        T_te: np.ndarray [shape=(n_te, d_T)]
            text data matrix for testing
        
        n_comps: int > 0 [scalar]
            number of canonical componens to use
            
        Returns
        -------
        
        I_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            image data matrix represetned in correlation space
        
        T_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            text data matrix represetned in correlation space
        
        I_te_cca : np.ndarray [shape=(n_te, n_comps)]
            image data matrix represetned in correlation space
        
        T_te_cca : np.ndarray [shape=(n_te, n_comps)]
            text data matrix represetned in correlation space
        
        """


    # sclale image and text data
    I_scaler = StandardScaler()
    I_tr = I_scaler.fit_transform(I_tr)
    I_te = I_scaler.transform(I_te)

    T_scaler = StandardScaler()
    T_tr = T_scaler.fit_transform(T_tr)
    T_te = T_scaler.transform(T_te)

    cca = PLSCanonical(n_components=n_comps, scale=False)
    cca.fit(I_tr, T_tr)

    I_tr_cca, T_tr_cca = cca.transform(I_tr, T_tr)
    I_te_cca, T_te_cca = cca.transform(I_te, T_te)

    return I_tr_cca, T_tr_cca, I_te_cca, T_te_cca
예제 #2
0
def feature_action_sensitivity(feature_type='TD4'):
    ''' 对每个特征,分析其在不移位和移位情况下的协方差 '''
    results = []
    
    subjects = ['subject_' + str(i + 1) for i in range(1)]

    channel_pos_list = ['S0',                                             # 中心位置
                        'U1', 'U2', 'D1', 'D2', 'L1', 'L2', 'R1', 'R2']  # 上 下 左 右
    pos_num = len(channel_pos_list)
    
    actions = [i+1 for i in range(7)]
    action_num = len(actions)                        # 7 动作类型个数

    if feature_type == 'TD4':
        feature_list = ['MAV', 'ZC', 'SSC', 'WL']
    elif feature_type == 'TD5':
        feature_list = ['MAV', 'ZC', 'SSC', 'WL','RMS']
    feat_num = len(feature_list)                    # 4 特征维度

    groups = [i+1 for i in range(4)]
    group_num = len(groups)                         # 4 通道数
    group_span = group_num*feat_num
    # print group_span
    action_span = feat_num*group_num                # 16
    # print groups, channel_num, channel_span, feat_num
    
    train_dir = 'train4_250_100'


    results.append(['subject', 'action', 'feature', 'group', 'means_shift', 'std_shift'] )
    plsca = PLSCanonical(n_components=2)
    # pos = 1
    k=0
    for pos_idx, pos_name in enumerate(channel_pos_list[1:]):
        pos = pos_idx+1
        for subject in subjects:
            # shift_simulation = np.ones((action_num,action_span,2))
            trains, classes = data_load.load_feature_dataset(train_dir, subject, feature_type)
            # m = trains.shape[0]
            # print trains.shape, classes.shape, m
            # print group_span, group_span*2
            # sys.exit(0)
            # m = trains.shape[0]*2/3
            m = trains.shape[0]/2
            X_train = trains[:m, group_span*pos: group_span*(pos+1)]
            Y_train = trains[:m:, :group_span]
            X_test = trains[m:, group_span*pos: group_span*(pos+1)]
            Y_test = trains[m:, :group_span]

            plsca.fit(X_train, Y_train)
            X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
            X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

            filename=subject+'_'+pos_name
            # plot_plsc_figure(X_train_r,Y_train_r,X_test_r, Y_test_r, filename)
            plot_plsc_figure_two(X_train_r,Y_train_r,X_test_r, Y_test_r, filename)
예제 #3
0
def generate_transform_equations(trains_S0, trains_shift, **kw):
    print 'generate transform equations.........'
    new_fold(transform_fold)
    chan_len = kw['chan_len']
    for idx, channel_pos in enumerate(kw['pos_list']):
        X_trains = trains_shift[:,idx*chan_len:idx*chan_len+chan_len]
        plsca = PLSCanonical(n_components=12)
        plsca.fit(X_trains, trains_S0)
        joblib.dump(plsca, transform_fold+'/cca_transform_'+kw['subject']+'_'+channel_pos+'.model')
    print 'generate transform equations finished.........'
예제 #4
0
def drawFaces(emb1, emb2, wordRanking, n, reduction="cut"):
    """
    Plot Chernoff faces for n most/less interesting words
    From: https://gist.github.com/aflaxman/4043086
    :param n: if negative: less interesting
    :param reduction:
    :return:
    """
    s1 = None
    s2 = None
    if reduction=="cut":
        s1 = emb1.getSimMatrix()[0:,0:18]
        s2 = emb2.getSimMatrix()[0:,0:18]
    elif reduction=="svd":
        s1 = TruncatedSVD(n_components=k).fit_transform(emb1.getSimMatrix())
        s2 = TruncatedSVD(n_components=k).fit_transform(emb2.getSimMatrix())
    elif reduction=="cca": #use orginal embeddings, not similarity matrix for reduction
        cca = PLSCanonical(n_components=18)
        cca.fit(emb1.m, emb2.m)
        s1, s2 = cca.transform(emb1.m, emb2.m)
    interesting = list()
    name = str(n)+"."+reduction
    if n<0: #plot uninteresting words
        n *= -1
        interesting = [wordRanking[::-1][i] for i in xrange(n)]
    else:
        interesting = [wordRanking[i] for i in xrange(n)]
    fig = plt.figure(figsize=(11,11))
    c = 0
    for i in range(n):
        word = interesting[i]
        j = emb1.d[word]
        ax = fig.add_subplot(n,2,c+1,aspect='equal')
        mpl_cfaces.cface(ax, *s1[j]) #nice for similarity matrix *s1[j][:18]
        ax.axis([-1.2,1.2,-1.2,1.2])
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title(word)
        ax2 = fig.add_subplot(n,2,c+2,aspect='equal')
        mpl_cfaces.cface(ax2, *s2[j])
        ax2.axis([-1.2,1.2,-1.2,1.2])
        ax2.set_xticks([])
        ax2.set_yticks([])
        ax2.set_title(word)
        c += 2
    plotname = "plots/"+NAME+".cface_s1s2_"+name+".png"
    fig.savefig(plotname)
    print("\tSaved Chernoff faces plot in '%s'" % (plotname))
예제 #5
0
    def getCCARanking(self, filter=None):
        """
        Compare how far apart words are in projection into common space by CCA
        :return:
        """
        cca = PLSCanonical(n_components=self.n)
        cca.fit(self.emb1.m, self.emb2.m)
        m1transformed, m2transformed = cca.transform(self.emb1.m, self.emb2.m)

        #get distances between vectors
        assert self.emb1.vocab_size == self.emb2.vocab_size
        distDict = dict()
        for i in xrange(self.emb1.vocab_size):
            v1 = m1transformed[i]
            v2 = m2transformed[i]
            w = self.emb1.rd[i]
            distDict[w] = 1-Similarity.euclidean(v1,v2)
        ranked = sorted(distDict.iteritems(), key=itemgetter(1), reverse=True)
        if filter is not None:
            ranked = [(w, s) for (w, s) in distDict.iteritems() if w in filter]
        return ranked
예제 #6
0
 def plotClustersCCA(self, filter=None):
     """
     Plot clusters in 2dim CCA space: Comparable across embeddings
     :return:
     """
     if len(self.cluster1) <= 1:
         cmap1 = plt.get_cmap('jet', 2)
     else:
         cmap1 = plt.get_cmap('jet', len(self.cluster1))
     cmap1.set_under('gray')
     if len(self.cluster2) <= 1:
         cmap2 = plt.get_cmap('jet', 2)
     else:
         cmap2 = plt.get_cmap('jet', len(self.cluster2))
     cmap2.set_under('gray')
     cca = PLSCanonical(n_components=2)
     cca.fit(self.emb1.m, self.emb2.m)
     m1transformed, m2transformed = cca.transform(self.emb1.m, self.emb2.m)
     labels1 = [self.emb1.rd[i] for i in xrange(self.emb1.vocab_size)]
     colors1 = [self.word2cluster1[self.emb1.rd[i]] for i in xrange(self.emb1.vocab_size)]
     labels2 = [self.emb2.rd[i] for i in xrange(self.emb2.vocab_size)]
     colors2 = [self.word2cluster2[self.emb2.rd[i]] for i in xrange(self.emb2.vocab_size)]
     if filter is not None:
         print("\tFiltering samples to plot")
         filteredIds = [self.emb1.d[w] for w in filter] #get ids for words in filter
         m1transformed = m1transformed[filteredIds]
         m2transformed = m2transformed[filteredIds]
         labels1 = [l for l in labels1 if l in filter]
         labels2 = [l for l in labels2 if l in filter]
     elif m1transformed.shape[0] > 100: #sample indices to display, otherwise it's too messy
         filteredIds = np.random.randint(low=0, high=m1.transformed.shape[0]) #sample filteredIds
         m1transformed = m1transformed[filteredIds]
         m2transformed = m2transformed[filteredIds]
         labels1 = [l for l in labels1 if l in filter]
         labels2 = [l for l in labels2 if l in filter]
     plotWithLabelsAndColors(m1transformed, labels1, colors=colors1, cmap=cmap1, filename="plots/"+NAME+".cca1.png", dimRed="CCA")
     plotWithLabelsAndColors(m2transformed, labels2, colors=colors2, cmap=cmap2, filename="plots/"+NAME+".cca2.png", dimRed="CCA")
예제 #7
0
def test_pls_canonical_basics():
    # Basic checks for PLSCanonical
    d = load_linnerud()
    X = d.data
    Y = d.target

    pls = PLSCanonical(n_components=X.shape[1])
    pls.fit(X, Y)

    assert_matrix_orthogonal(pls.x_weights_)
    assert_matrix_orthogonal(pls.y_weights_)
    assert_matrix_orthogonal(pls._x_scores)
    assert_matrix_orthogonal(pls._y_scores)

    # Check X = TP' and Y = UQ'
    T = pls._x_scores
    P = pls.x_loadings_
    U = pls._y_scores
    Q = pls.y_loadings_
    # Need to scale first
    Xc, Yc, x_mean, y_mean, x_std, y_std = _center_scale_xy(X.copy(),
                                                            Y.copy(),
                                                            scale=True)
    assert_array_almost_equal(Xc, np.dot(T, P.T))
    assert_array_almost_equal(Yc, np.dot(U, Q.T))

    # Check that rotations on training data lead to scores
    Xt = pls.transform(X)
    assert_array_almost_equal(Xt, pls._x_scores)
    Xt, Yt = pls.transform(X, Y)
    assert_array_almost_equal(Xt, pls._x_scores)
    assert_array_almost_equal(Yt, pls._y_scores)

    # Check that inverse_transform works
    X_back = pls.inverse_transform(Xt)
    assert_array_almost_equal(X_back, X)
    _, Y_back = pls.inverse_transform(Xt, Yt)
    assert_array_almost_equal(Y_back, Y)
예제 #8
0
class _PLSCanonicalImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)

    def predict(self, X):
        return self._wrapped_model.predict(X)
def plot_compare_cross_decomposition():
    # Dataset based latent variables model

    n = 500
    # 2 latents vars:
    l1 = np.random.normal(size=n)
    l2 = np.random.normal(size=n)

    latents = np.array([l1, l1, l2, l2]).T
    X = latents + np.random.normal(size=4 * n).reshape((n, 4))
    Y = latents + np.random.normal(size=4 * n).reshape((n, 4))

    X_train = X[:n // 2]
    Y_train = Y[:n // 2]
    X_test = X[n // 2:]
    Y_test = Y[n // 2:]

    print("Corr(X)")
    print(np.round(np.corrcoef(X.T), 2))
    print("Corr(Y)")
    print(np.round(np.corrcoef(Y.T), 2))

    # #############################################################################
    # Canonical (symmetric) PLS

    # Transform data
    # ~~~~~~~~~~~~~~
    plsca = PLSCanonical(n_components=2)
    plsca.fit(X_train, Y_train)
    X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
    X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

    # Scatter plot of scores
    # ~~~~~~~~~~~~~~~~~~~~~~
    # 1) On diagonal plot X vs Y scores on each components
    plt.figure(figsize=(12, 8))
    plt.subplot(221)
    plt.scatter(X_train_r[:, 0],
                Y_train_r[:, 0],
                label="train",
                marker="o",
                s=25)
    plt.scatter(X_test_r[:, 0], Y_test_r[:, 0], label="test", marker="o", s=25)
    plt.xlabel("x scores")
    plt.ylabel("y scores")
    plt.title('Comp. 1: X vs Y (test corr = %.2f)' %
              np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1])
    plt.xticks(())
    plt.yticks(())
    plt.legend(loc="best")

    plt.subplot(224)
    plt.scatter(X_train_r[:, 1],
                Y_train_r[:, 1],
                label="train",
                marker="o",
                s=25)
    plt.scatter(X_test_r[:, 1], Y_test_r[:, 1], label="test", marker="o", s=25)
    plt.xlabel("x scores")
    plt.ylabel("y scores")
    plt.title('Comp. 2: X vs Y (test corr = %.2f)' %
              np.corrcoef(X_test_r[:, 1], Y_test_r[:, 1])[0, 1])
    plt.xticks(())
    plt.yticks(())
    plt.legend(loc="best")

    # 2) Off diagonal plot components 1 vs 2 for X and Y
    plt.subplot(222)
    plt.scatter(X_train_r[:, 0],
                X_train_r[:, 1],
                label="train",
                marker="*",
                s=50)
    plt.scatter(X_test_r[:, 0], X_test_r[:, 1], label="test", marker="*", s=50)
    plt.xlabel("X comp. 1")
    plt.ylabel("X comp. 2")
    plt.title('X comp. 1 vs X comp. 2 (test corr = %.2f)' %
              np.corrcoef(X_test_r[:, 0], X_test_r[:, 1])[0, 1])
    plt.legend(loc="best")
    plt.xticks(())
    plt.yticks(())

    plt.subplot(223)
    plt.scatter(Y_train_r[:, 0],
                Y_train_r[:, 1],
                label="train",
                marker="*",
                s=50)
    plt.scatter(Y_test_r[:, 0], Y_test_r[:, 1], label="test", marker="*", s=50)
    plt.xlabel("Y comp. 1")
    plt.ylabel("Y comp. 2")
    plt.title('Y comp. 1 vs Y comp. 2 , (test corr = %.2f)' %
              np.corrcoef(Y_test_r[:, 0], Y_test_r[:, 1])[0, 1])
    plt.legend(loc="best")
    plt.xticks(())
    plt.yticks(())
    plt.show()

    # #############################################################################
    # PLS regression, with multivariate response, a.k.a. PLS2

    n = 1000
    q = 3
    p = 10
    X = np.random.normal(size=n * p).reshape((n, p))
    B = np.array([[1, 2] + [0] * (p - 2)] * q).T
    # each Yj = 1*X1 + 2*X2 + noize
    Y = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5

    pls2 = PLSRegression(n_components=3)
    pls2.fit(X, Y)
    print("True B (such that: Y = XB + Err)")
    print(B)
    # compare pls2.coef_ with B
    print("Estimated B")
    print(np.round(pls2.coef_, 1))
    pls2.predict(X)

    # PLS regression, with univariate response, a.k.a. PLS1

    n = 1000
    p = 10
    X = np.random.normal(size=n * p).reshape((n, p))
    y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
    pls1 = PLSRegression(n_components=3)
    pls1.fit(X, y)
    # note that the number of components exceeds 1 (the dimension of y)
    print("Estimated betas")
    print(np.round(pls1.coef_, 1))

    # #############################################################################
    # CCA (PLS mode B with symmetric deflation)

    cca = CCA(n_components=2)
    cca.fit(X_train, Y_train)
    X_train_r, Y_train_r = cca.transform(X_train, Y_train)
    X_test_r, Y_test_r = cca.transform(X_test, Y_test)
X_train = X[:n // 2]
Y_train = Y[:n // 2]
X_test = X[n // 2:]
Y_test = Y[n // 2:]

print("Corr(X)")
print(np.round(np.corrcoef(X.T), 2))
print("Corr(Y)")
print(np.round(np.corrcoef(Y.T), 2))

# #############################################################################
# Canonical (symmetric) PLS

# Transform data
# ~~~~~~~~~~~~~~
plsca = PLSCanonical(n_components=2)
plsca.fit(X_train, Y_train)
X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

# Scatter plot of scores
# ~~~~~~~~~~~~~~~~~~~~~~
# 1) On diagonal plot X vs Y scores on each components
plt.figure(figsize=(12, 8))
plt.subplot(221)
plt.plot(X_train_r[:, 0], Y_train_r[:, 0], "ob", label="train")
plt.plot(X_test_r[:, 0], Y_test_r[:, 0], "or", label="test")
plt.xlabel("x scores")
plt.ylabel("y scores")
plt.title('Comp. 1: X vs Y (test corr = %.2f)' %
          np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1])
예제 #11
0
################################################################################
#
# PLS
#
################################################################################

import scipy.linalg
from sklearn.cross_decomposition import PLSCanonical

Xim_tgt_s_ = StandardScaler().fit_transform(Xim[msk_tgt, :])
Xdemoclin_tgt_s_ = StandardScaler().fit_transform(Xdemoclin[msk_tgt, :])
_, s_, _ = scipy.linalg.svd(Xdemoclin_tgt_s_, full_matrices=False)
rank_ = np.sum(s_ > 1e-6)

plsca = PLSCanonical(n_components=rank_)
%time PLSim_scores, PLSclin_scores = plsca.fit_transform(Xim_tgt_s_, Xdemoclin_tgt_s_)

# Imaging components
df_ = pd.DataFrame(PLSim_scores)
df_["respond_wk8"] = pop["respond_wk8"][msk_tgt].values
df_["respond_wk16"] = pop["respond_wk16"][msk_tgt].values
df_["GM_frac"] = pop["GM_frac"][msk_tgt].values
sns.pairplot(df_, hue="respond_wk8")
print("PC1 capture global GM atrophy")

# Demo/Clinic components
df_ = pd.DataFrame(PLSclin_scores)
for var in vars_demo + vars_clinic + ["respond_wk8", "respond_wk16"]:
    df_[var] = pop[var][msk_tgt].values
예제 #12
0
        vec_c.append(i)
    for i in vec2:
        vec_c.append(i)

    if j < len_train:
        l_p.append(vec_p)
        l_c.append(vec_c)
    else:
        l_p_t.append(vec_p)
        l_c_t.append(vec_c)
    j += 1

sorted_p = np.asarray(l_p)
sorted_c = np.asarray(l_c)  #Convert the input to an array

plc = PLSCanonical()
plc.fit_transform(sorted_c, sorted_p)
sorted_c, sorted_p = plc.transform(sorted_c, sorted_p)

sorted_c_test = np.asarray(l_c_t)
sorted_p_test = np.asarray(l_p_t)
sorted_c_test, sorted_p_test = plc.transform(sorted_c_test, sorted_p_test)

plr = PLSRegression()
plr.fit(sorted_c, sorted_p)
params = plr.get_params()
plr.set_params(**params)
y_score = plr.predict(sorted_c_test)
sim_count = 0

print("Test Similarity: ")
예제 #13
0
class Wrapper:
    """
    This is a wrapper class for linear, regularised and kernel  CCA, Multiset CCA and Generalized CCA.
    We create an instance with a method and number of latent dimensions.
    If we have more than 2 views we need to use generalized methods, but we can override in the 2 view case also with
    the generalized parameter.

    The class has a number of methods:

    fit(): gives us train correlations and stores the variables needed for out of sample prediction as well as some
    method-specific variables

    cv_fit(): allows us to perform a hyperparameter search and then fit the model using the optimal hyperparameters

    predict_corr(): allows us to predict the out of sample correlation for supplied views

    predict_view(): allows us to predict a reconstruction of missing views from the supplied views

    transform_view(): allows us to transform given views to the latent variable space

    remaining methods are used to
    """

    def __init__(self, latent_dims: int = 1, method: str = 'l2', generalized: bool = False, max_iter: int = 500,
                 tol=1e-6):
        self.latent_dims = latent_dims
        self.method = method
        self.generalized = generalized
        self.max_iter = max_iter
        self.tol = tol

    def fit(self, *args, params=None):
        if params is None:
            params = {}
        self.params = params
        if len(args) > 2:
            self.generalized = True
            print('more than 2 views therefore switched to generalized')
        if 'c' not in self.params:
            self.params = {'c': [0] * len(args)}
        if self.method == 'kernel':
            #Linear kernel by default
            if 'kernel' not in self.params:
                self.params['kernel'] = 'linear'
            #First order polynomial by default
            if 'degree' not in self.params:
                self.params['degree'] = 1
            # First order polynomial by default
            if 'sigma' not in self.params:
                self.params['sigma'] = 1.0

        # Fit returns in-sample score vectors and correlations as well as models with transform functionality
        self.dataset_list = []
        self.dataset_means = []
        for dataset in args:
            self.dataset_means.append(dataset.mean(axis=0))
            self.dataset_list.append(dataset - dataset.mean(axis=0))

        if self.method == 'kernel':
            self.fit_kcca = cca_zoo.KCCA.KCCA(self.dataset_list[0], self.dataset_list[1], params=self.params,
                                              latent_dims=self.latent_dims)
            self.score_list = [self.fit_kcca.U, self.fit_kcca.V]
        elif self.method == 'pls':
            self.fit_scikit_pls(self.dataset_list[0], self.dataset_list[1])
        elif self.method == 'scikit':
            self.fit_scikit_cca(self.dataset_list[0], self.dataset_list[1])
        elif self.method == 'mcca':
            self.fit_mcca(*self.dataset_list)
        elif self.method == 'gcca':
            self.fit_gcca(*self.dataset_list)
        else:
            self.outer_loop(*self.dataset_list)
            if self.method[:4] == 'tree':
                self.tree_list = [self.tree_list[i] for i in range(len(args))]
                self.weights_list = [np.expand_dims(tree.feature_importances_, axis=1) for tree in self.tree_list]
            else:
                self.rotation_list = []
                for i in range(len(args)):
                    self.rotation_list.append(
                        self.weights_list[i] @ pinv2(self.loading_list[i].T @ self.weights_list[i], check_finite=False))
        self.train_correlations = self.predict_corr(*args)
        return self

    def cv_fit(self, *args, param_candidates=None, folds: int = 5, verbose: bool = False):
        best_params = cross_validate(*args, max_iter=self.max_iter, latent_dims=self.latent_dims, method=self.method,
                                     param_candidates=param_candidates, folds=folds,
                                     verbose=verbose, tol=self.tol)
        self.fit(*args, params=best_params)
        return self

    def bayes_cv_fit(self, *args, param_candidates=None, folds: int = 5, verbose: bool = False):
        space = {
            "n_estimators": hp.choice("n_estimators", [100, 200, 300, 400, 500, 600]),
            "max_depth": hp.quniform("max_depth", 1, 15, 1),
            "criterion": hp.choice("criterion", ["gini", "entropy"]),
        }

        trials = Trials()

        best_params = fmin(
            fn=Wrapper(),
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials
            )
        self.fit(*args, params=best_params)
        return self

    def predict_corr(self, *args):
        # Takes two datasets and predicts their out of sample correlation using trained model
        transformed_views = self.transform_view(*args)
        all_corrs = []
        for x, y in itertools.product(transformed_views, repeat=2):
            all_corrs.append(np.diag(np.corrcoef(x.T, y.T)[:self.latent_dims, self.latent_dims:]))
        all_corrs = np.array(all_corrs).reshape((len(args), len(args), self.latent_dims))
        return all_corrs

    def predict_view(self, *args):
        # Regress original given views onto target
        transformed_views = self.transform_view(*args)

        # Get the regression from the training data with available views
        predicted_target = np.mean([transformed_views[i] for i in range(len(args)) if args[i] is not None], axis=0)

        predicted_views = []
        for i, view in enumerate(args):
            if view is None:
                predicted_views.append(predicted_target @ pinv2(self.weights_list[i]))
            else:
                predicted_views.append(view)
        for i, predicted_view in enumerate(predicted_views):
            predicted_views[i] += self.dataset_means[i]
        return predicted_views

    def transform_view(self, *args):
        # Demeaning
        new_views = []
        for i, new_view in enumerate(args):
            if new_view is None:
                new_views.append(None)
            else:
                new_views.append(new_view - self.dataset_means[i])

        if self.method == 'kernel':
            transformed_views = list(self.fit_kcca.transform(new_views[0], new_views[1]))
        elif self.method == 'pls':
            transformed_views = list(self.PLS.transform(new_views[0], new_views[1]))
        elif self.method[:4] == 'tree':
            transformed_views = []
            for i, new_view in enumerate(new_views):
                if new_view is None:
                    transformed_views.append(None)
                else:
                    transformed_views.append(self.tree_list[i].predict(new_view))
        else:
            transformed_views = []
            for i, new_view in enumerate(new_views):
                if new_view is None:
                    transformed_views.append(None)
                else:
                    transformed_views.append(new_view @ self.rotation_list[i])
        # d x n x k
        return transformed_views

    def outer_loop(self, *args):
        # list of d: p x k
        self.weights_list = [np.zeros((args[i].shape[1], self.latent_dims)) for i in range(len(args))]
        # list of d: n x k
        self.score_list = [np.zeros((args[i].shape[0], self.latent_dims)) for i in range(len(args))]
        # list of d:
        self.loading_list = [np.zeros((args[i].shape[1], self.latent_dims)) for i in range(len(args))]

        if len(args) == 2:
            C_train = args[0].T @ args[1]
            C_train_res = C_train.copy()
        else:
            C_train_res = None

        residuals = list(args)
        # For each of the dimensions
        for k in range(self.latent_dims):
            self.inner_loop = cca_zoo.alternating_least_squares.ALS_inner_loop(*residuals, C=C_train_res,
                                                                               generalized=self.generalized,
                                                                               params=self.params,
                                                                               method=self.method,
                                                                               max_iter=self.max_iter)
            for i in range(len(args)):
                if self.method[:4] == 'tree':
                    self.tree_list = self.inner_loop.weights
                else:
                    self.weights_list[i][:, k] = self.inner_loop.weights[i]
                    self.score_list[i][:, k] = self.inner_loop.targets[i, :]
                    self.loading_list[i][:, k] = residuals[i].T @ self.score_list[i][:, k] / np.linalg.norm(
                        self.score_list[i][:, k])
                    residuals[i] -= np.outer(self.score_list[i][:, k] / np.linalg.norm(self.score_list[i][:, k]),
                                             self.loading_list[i][:, k])
        return self

    def fit_scikit_cca(self, train_set_1, train_set_2):
        self.cca = CCA(n_components=self.latent_dims, scale=False)
        self.cca.fit(train_set_1, train_set_2)
        self.score_list = [self.cca.x_scores_, self.cca.y_scores_]
        self.weights_list = [self.cca.x_weights_, self.cca.y_weights_]
        self.loading_list = [self.cca.x_loadings_, self.cca.y_loadings_]
        self.rotation_list = [self.cca.x_rotations_, self.cca.y_rotations_]
        return self

    def fit_scikit_pls(self, train_set_1, train_set_2):
        self.PLS = PLSCanonical(n_components=self.latent_dims, scale=False)
        self.PLS.fit(train_set_1, train_set_2)
        self.score_list = [self.PLS.x_scores_, self.PLS.y_scores_]
        self.weights_list = [self.PLS.x_weights_, self.PLS.y_weights_]
        return self

    def fit_mcca(self, *args):
        all_views = np.concatenate(args, axis=1)
        C = all_views.T @ all_views
        # Can regularise by adding to diagonal
        D = block_diag(*[(1 - self.params['c'][i]) * m.T @ m + self.params['c'][i] * np.eye(m.shape[1]) for i, m in
                         enumerate(args)])
        R = cholesky(D, lower=False)
        whitened = np.linalg.inv(R.T) @ C @ np.linalg.inv(R)
        [eigvals, eigvecs] = np.linalg.eig(whitened)
        idx = np.argsort(eigvals, axis=0)[::-1]
        eigvecs = eigvecs[:, idx].real
        eigvals = eigvals[idx].real
        eigvecs = np.linalg.inv(R) @ eigvecs
        splits = np.cumsum([0] + [view.shape[1] for view in args])
        self.weights_list = [eigvecs[splits[i]:splits[i + 1], :self.latent_dims] for i in range(len(args))]
        self.rotation_list = self.weights_list
        self.score_list = [self.dataset_list[i] @ self.weights_list[i] for i in range(len(args))]

    def fit_gcca(self, *args):
        Q = []
        for i, view in enumerate(args):
            view_cov = view.T @ view
            view_cov = (1 - self.params['c'][i]) * view_cov + self.params['c'][i] * np.eye(view_cov.shape[0])
            Q.append(view @ np.linalg.inv(view_cov) @ view.T)
        Q = np.sum(Q, axis=0)
        [eigvals, eigvecs] = np.linalg.eig(Q)
        idx = np.argsort(eigvals, axis=0)[::-1]
        eigvecs = eigvecs[:, idx].real
        eigvals = eigvals[idx].real
        self.weights_list = [np.linalg.pinv(view) @ eigvecs[:, :self.latent_dims] for view in args]
        self.rotation_list = self.weights_list
        self.score_list = [self.dataset_list[i] @ self.weights_list[i] for i in range(len(args))]
예제 #14
0
def run_pls_loop(n_seeds: int = 10, equal_dims: bool = False, **kwargs):
    default_args = {
        'sample_sizes': [int(10 ** i) for i in range(2, 5)],
        'seeds': [int(2 ** i) for i in range(n_seeds)],
        'sigmas': np.linspace(0, 5, num=11),
        'orthogonal': [False],
        'normal': [True],
        'three_d': [False, True],
        'dims_x': np.logspace(1, 4, num=4, base=4, dtype=int),
        'dims_y': np.logspace(1, 4, num=4, base=4, dtype=int),
    }
    for k in default_args:
        if k in kwargs:
            default_args[k] = list(kwargs[k]) if isinstance(kwargs[k], (list, tuple, np.ndarray)) else [kwargs[k]]

    df = pd.DataFrame()
    for n_samples in tqdm(default_args['sample_sizes']):
        for three_d in default_args['three_d']:
            train, test = generate_source_signal(n_samples=n_samples, three_d=three_d)
            for seed in tqdm(default_args['seeds'], leave=False):
                for orthogonal in default_args['orthogonal']:
                    for normal in default_args['normal']:
                        for sigma in default_args['sigmas']:
                            for dim_x in default_args['dims_x']:
                                for dim_y in [dim_x] if equal_dims else default_args['dims_y']:
                                    # create sim
                                    sim = create_pls_simulation(
                                        train=train,
                                        test=test,
                                        n_samples=n_samples,
                                        three_d=three_d,
                                        angle_spacing=1.0,
                                        magnitude_range=None,
                                        dim_x=dim_x,
                                        dim_y=dim_y,
                                        sigma=sigma,
                                        orthogonal=orthogonal,
                                        normal=normal,
                                        seed=seed,
                                    )
                                    # fit PLS
                                    pls = PLSCanonical(
                                        n_components=sim['metadata']['dim_z'],
                                        scale=True,
                                        algorithm='svd',
                                        max_iter=int(1e9),
                                        tol=1e-15,
                                    ).fit(sim['x_train'], sim['y_train'])
                                    # get results
                                    results = visualize_pls_results(pls, sim, verbose=False)
                                    results.update({
                                        'n_samples': n_samples,
                                        'three_d': three_d,
                                        'seed': seed,
                                        'orthogonal': orthogonal,
                                        'normal': normal,
                                        'sigma': sigma,
                                        'dim_x': dim_x,
                                        'dim_y': dim_y,
                                    })
                                    results = {k: [v] for k, v in results.items()}
                                    df = pd.concat([df, pd.DataFrame.from_dict(results)])
    return reset_df(df), default_args
예제 #15
0
def pls_decomposition(videos, audios, n_components=256):
    plsca = PLSCanonical(n_components=n_components)
    plsca.fit(audios, videos)

    videos_c, audios_c = plsca.transform(videos, audios)
    return videos_c, audios_c
reg = linear_model.Lasso(alpha=0.1)
#弹性网络回归(Elastic Net)
from sklearn.linear_model import ElasticNet
regr = ElasticNet(random_state=0)
#贝叶斯回归(Bayesian Regression)
from sklearn import linear_model
reg = linear_model.BayesianRidge()
#多项式回归(Polynomial regression——多项式基函数回归)
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2)
poly.fit_transform(X)
#偏最小二乘回归(PLS)
from sklearn.cross_decomposition import PLSCanonical
PLSCanonical(algorithm='nipals',
             copy=True,
             max_iter=500,
             n_components=2,
             scale=True,
             tol=1e-06)
#典型相关分析(CCA)
from sklearn.cross_decomposition import CCA
cca = CCA(n_components=2)

#B聚类分析
#KNN算法
from sklearn.neighbors import KNeighborsClassifier
nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(X)
#Kmeans算法
from sklearn.cluster import KMeans
kmeans = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
#层次聚类(Hierarchical clustering)——支持多种距离
from sklearn.cluster import AgglomerativeClustering
plt.plot(fpr, tpr)
plt.plot([0, 1], [0, 1])
plt.xlim([0, 1])
plt.gca().set_aspect('equal', adjustable='box')
plt.legend(['Cell volume', 'Age', 'Both'])

#NB: Strong colinearity between Age and Volume

# Transition rate prediction using PLS
X = dfc_g1[['vol_sm', 'Age', 'gr_sm']]  # Design matrix
y = dfc_g1['G1S_logistic']  # Response var
# Drop NaN rows
I = np.isnan(dfc_g1['gr_sm'])
X = X.loc[~I].copy()
y = y[~I]
pls_model = PLSCanonical()
pls_model.fit(scale(X), y)

X_c, y_c = pls_model.transform(scale(X), y)

# Multiple linearregression on birth size and growth rate
df['bvol'] = df['Birth volume']
df['exp_gr'] = df['Exponential growth rate']
df['g1_len'] = df['G1 length']
model = smf.ols('g1_len ~ exp_gr + bvol', data=df).fit()
model.summary()
print model.pvalues

# Delete S/G2 after first time point
g1s_marked = []
for c in collated_filtered:
예제 #18
0
def training_lda_TD4_intra(my_clfs, trains, classes, **kw):

    start_time = time.time()
    if (kw.has_key('log_fold')):
        log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num = kw['action_num']
    cv = 3
    results = []
    results.append([
        'Feat', 'Algorithm', 'n_components', 'Channel_Pos', 'Accuracy', 'std'
    ])
    log_file = 'feat_' + kw['feature_type'] + '_intra'

    clf = sklearn.lda.LDA(solver='svd',
                          shrinkage=None,
                          priors=None,
                          n_components=None,
                          store_covariance=False,
                          tol=0.0001)

    data_num = trains.shape[0] / action_num

    scores = sklearn.cross_validation.cross_val_score(clf,
                                                      trains,
                                                      classes,
                                                      cv=cv)
    results.append([
        'feat_TD4_cv_' + str(cv), 'lda', 'ALL', 0,
        scores.mean(),
        scores.std()
    ])

    # 组内训练策略 9组数据
    print '组内训练.............'
    for idx, channel_pos in enumerate(kw['pos_list']):
        # print '----training TD4 intra , channel_pos: ', channel_pos,'......'
        trains_intra = trains[:, idx * chan_len:idx * chan_len + chan_len]

        scores = sklearn.cross_validation.cross_val_score(clf,
                                                          trains_intra,
                                                          classes,
                                                          cv=cv)
        results.append([
            'feat_TD4_cv_' + str(cv), 'lda', 0, channel_pos,
            scores.mean(),
            scores.std()
        ])

    # 中心训练策略
    print '中心训练策略.............'
    trains_intra_S0 = trains[:, 0:chan_len]
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        tests_shift = trains[:, idx * chan_len:idx * chan_len + chan_len]
        # if channel_pos == 'L2':
        #     print idx*chan_len, idx*chan_len+chan_len, tests_shift.shape, trains.shape
        #     sys.exit(0)
        scores = clf.fit(trains_intra_S0, classes).score(tests_shift, classes)
        results.append([
            'feat_TD4_cv_' + str(cv), 'lda', 0,
            'train S0' + ' test ' + channel_pos,
            scores.mean(),
            scores.std()
        ])

    # 组训练策略(不同于组内训练策略) 5-fold
    print '组训练策略.............'
    trains_intra_S0 = trains[:, 0:chan_len]
    kf = KFold(data_num, n_folds=cv)
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        itera = cv
        scores = np.zeros((itera, ))
        # stds = np.zeros( (itera,) )
        itera -= 1
        trains_shift = trains[:, idx * chan_len:idx * chan_len + chan_len]
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)

            for action_idx in range(action_num):
                train_idx_all = np.concatenate(
                    (train_idx_all, train_idx * (action_idx + 1)), axis=0)
                test_idx_all = np.concatenate(
                    (test_idx_all, test_idx * (action_idx + 1)), axis=0)

            X_train = np.concatenate(
                (trains_intra_S0[train_idx_all], trains_shift[train_idx_all]),
                axis=0)
            y_train = np.concatenate(
                (classes[train_idx_all], classes[train_idx_all]), axis=0)

            X_test = trains_shift[test_idx_all]
            y_test = classes[test_idx_all]

            # X_test = trains_shift
            # y_test = classes

            score = clf.fit(X_train, y_train).score(X_test, y_test)
            scores[itera] = score.mean()

            itera -= 1
        # print scores
        results.append([
            'feat_TD4_cv_' + str(cv), 'lda', 0, 'S0 + ' + channel_pos,
            np.mean(scores),
            np.std(scores)
        ])

    # 基于CCA的训练策略 5-fold 交叉验证
    print 'CCA训练策略.............'
    trains_S0 = trains[:, 0:chan_len]
    n_components_list = [6, 8, 10, 12, 14, 16]  # 子空间维数
    # n_components_list = [12,14,16]
    kf = KFold(data_num, n_folds=cv)
    for n_components in n_components_list:

        for idx, channel_pos in enumerate(kw['pos_list']):
            if channel_pos == 'S0':
                continue
            itera = cv
            scores = np.zeros((itera, ))
            stds = np.zeros((itera, ))
            itera -= 1
            trains_shift = trains[:, idx * chan_len:idx * chan_len + chan_len]
            for train_idx, test_idx in kf:
                train_idx_all = np.array([], np.int)
                test_idx_all = np.array([], np.int)
                for action_idx in range(action_num):
                    train_idx_all = np.concatenate(
                        (train_idx_all, train_idx * (action_idx + 1)), axis=0)
                    test_idx_all = np.concatenate(
                        (test_idx_all, test_idx * (action_idx + 1)), axis=0)
                # print train_idx_all.shape, train_idx_all, test_idx_all.shape, test_idx_all
                # plsca.fit(trains_shift[train_idx_all], trains_S0[train_idx_all])
                plsca = PLSCanonical(n_components=n_components)
                plsca.fit(trains_shift, trains_S0)
                trains_shift_cca, trains_S0_cca = plsca.transform(
                    trains_shift, trains_S0)
                X_trains = np.concatenate(
                    (trains_S0_cca, trains_shift_cca[train_idx_all]), axis=0)
                y_trains = np.concatenate((classes, classes[train_idx_all]),
                                          axis=0)
                score = clf.fit(X_trains,
                                y_trains).score(trains_shift_cca[test_idx_all],
                                                classes[test_idx_all])

                scores[itera] = score.mean()
                # stds[itera] = score.std()
                itera -= 1

            results.append([
                'feat_TD4_cv_' + str(cv), 'lda_cca', n_components,
                'S0 + ' + channel_pos,
                np.mean(scores),
                np.std(scores)
            ])

    log_result(results,
               log_fold + '/' + log_file + '_action_1-' + str(action_num), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_action_1-' + str(
        action_num)
    print '----training TD4 time elapsed:', time.time() - start_time
예제 #19
0
def training_lda_TD4_inter(my_clfs, trains_S0, trains_shift, classes, **kw):
    print 'training_lda_TD4_inter.........'
    start_time = time.time()

    log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num = kw['action_num']

    print "----training " + kw[
        'feature_type'] + " inter, training by position O, testing by electrode shift "

    cv = 5
    results = []
    results.append(['Feat', 'Algorithm', 'Channel_Pos', 'Accuracy', 'std'])
    log_file = 'feat_' + kw['feature_type'] + '_inter'

    clf = sklearn.lda.LDA(solver='svd',
                          shrinkage=None,
                          priors=None,
                          n_components=None,
                          store_covariance=False,
                          tol=0.0001)

    data_num = trains_S0.shape[0] / action_num
    # print data_num

    scores = sklearn.cross_validation.cross_val_score(clf,
                                                      trains_S0,
                                                      classes,
                                                      cv=cv)
    results.append(
        ['feat_TD4_cv_' + str(cv), 'lda', 'S0',
         scores.mean(),
         scores.std()])

    kf = KFold(data_num, n_folds=cv)

    for idx, channel_pos in enumerate(kw['pos_list']):

        X_test = trains_shift[:, idx * chan_len:idx * chan_len + chan_len]
        y_test = classes

        iteration = cv
        scores = np.zeros((iteration, ))
        cca_scores = np.zeros((iteration, ))

        iteration -= 1
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)
            for action_idx in range(action_num):
                train_idx_all = np.concatenate(
                    (train_idx_all, train_idx * action_idx), axis=0)
                test_idx_all = np.concatenate(
                    (test_idx_all, test_idx * action_idx), axis=0)

            # X_train, y_train = trains_S0[train_idx_all], classes[train_idx_all]
            X_train, y_train = trains_S0, classes

            X_train_shift, y_train_shift = X_test[train_idx_all], classes[
                train_idx_all]

            X_train_all = np.concatenate((X_train, X_train_shift), axis=0)
            y_train_all = np.concatenate((y_train, y_train_shift), axis=0)

            sys.exit(0)
            score_inter = clf.fit(X_train_all,
                                  y_train_all).score(X_test, y_test)
            scores[iteration] = score_inter.mean()
            # print X_train.shape, y_train.shape

            if channel_pos != 'S0':

                # plsca = joblib.load(transform_fold+'/cca_transform_'+kw['subject']+'_'+channel_pos+'.model')
                plsca = PLSCanonical(n_components=14)
                # print X_test.shape, X_train.shape
                # sys.exit(0)
                plsca.fit(X_test[train_idx], X_train)
                X_test_cca, X_train_cca = plsca.transform(X_test, X_train)
                cca_score = clf.fit(X_train_cca,
                                    y_train).score(X_test_cca, y_test)
                cca_scores[iteration] = cca_score.mean()

            iteration -= 1

        # print scores
        # print cca_scores
        # sys.exit(0)
        results.append(
            ['feat_TD4', 'lda', channel_pos,
             np.mean(scores),
             np.std(scores)])
        results.append([
            'feat_TD4', 'lda_cca', channel_pos,
            np.mean(cca_scores),
            np.std(cca_scores)
        ])

    log_result(results, log_fold + '/' + log_file + '_' + str(kw['num']), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_' + channel_pos + '_' + str(
        kw['num'])
    print '----training TD4 time elapsed:', time.time() - start_time
예제 #20
0
    connectivity_data)

if not include_negative_weights:

    # set negative connectivities to 0
    edge_data = np.apply_along_axis(
        lambda x: [0 if element < 0 else element for element in x], 1,
        edge_data)

# re-split data (3 ways) for CCA
X1_train = edge_data[:140, :]
X2_train = edge_data[140:280, :]
X2_remain = edge_data[280:, :]
#cca = CCA(n_components =2)
#cca.fit(X1_train, X2_train)
cca = PLSCanonical(n_components=100)
cca.fit(X1_train, X2_train)
block_1_transformed, block_2_transformed = cca.transform(X1_train,
                                                         X2_train,
                                                         copy=False)
block_3_transformed = np.dot(X2_remain, cca.y_rotations_)

edge_data_transformed = np.vstack(
    (block_1_transformed, block_2_transformed, block_3_transformed))
# initialise the classifier

clf = svm.SVC(kernel='precomputed')

# optional shuffle
perm = np.random.permutation(n_subjects)
#print perm
예제 #21
0
#correct not accurate
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.cross_decomposition import PLSRegression
from sklearn.cross_decomposition import PLSCanonical

df = pd.read_csv('newdata.csv')
x = df.drop(['tag'], axis=1)
y = df.drop(['kx', 'ky', 'kz', 'wa', 'wb', 'wc', 'wd', 'we', 'wf'], axis=1)
X_train, X_test, Y_train, Y_test = train_test_split(x, y, random_state=5)

plsr = PLSRegression()
plsr.fit(X_train, Y_train)

plsc = PLSCanonical()
plsc.fit(X_train, Y_train)

print(plsr.score(X_test, Y_test))
print(plsc.score(X_test, Y_test))
예제 #22
0
plssvd = PLSSVD(n_components=2)
xt,yt = plssvd.fit_transform(dataTrain,Ytrain)
fig = plt.figure()
util.plotData(fig,xt,labelsTrain,classColors)

u = plssvd.x_weights_
plt.quiver(u[0,0],u[1,0],color='k',edgecolor='k',lw=1,scale=0.1,figure=fig)
plt.quiver(-u[1,0],u[0,0],color='k',edgecolor='k',lw=1,scale=0.4,figure=fig)

#%% PLS mode-A
lda = LDA()
nComponents = np.arange(1,nClasses+1)
plsCanScores = np.zeros((2,np.alen(nComponents)))
for i,n in enumerate(nComponents):
    plscan = PLSCanonical(n_components=n)
    plscan.fit(dataTrain,Ytrain)
    dataTrainT = plscan.transform(dataTrain)
    dataTestT = plscan.transform(dataTest)
    plsCanScores[:,i] = util.classify(dataTrainT,dataTestT,labelsTrain,labelsTest)
fig = plt.figure()
util.plotAccuracy(fig,nComponents,plsCanScores)
plt.title('PLS Canonical accuracy',figure=fig)

plscan = PLSCanonical(n_components=2)
xt,yt = plscan.fit_transform(dataTrain,Ytrain)
fig = plt.figure()
util.plotData(fig,xt,labelsTrain,classColors)

u = plscan.x_weights_
plt.quiver(u[0,0],u[1,0],color='k',edgecolor='k',lw=1,scale=0.1,figure=fig)
예제 #23
0
 def __init__(self, **hyperparams):
     self._hyperparams = hyperparams
     self._wrapped_model = Op(**self._hyperparams)
예제 #24
0
        plt.plot(nComponents, plsSvdScores[i, :], lw=3)

    plt.xlim(1, np.amax(nComponents))
    plt.title('PLS SVD accuracy')
    plt.xlabel('Number of components')
    plt.ylabel('accuracy')
    plt.legend(['LR', 'LDA', 'GNB', 'Linear SVM', 'rbf SVM'],
               loc='lower right')
    plt.grid(True)

if (0):
    #%% PLS Cannonical
    nComponents = np.arange(1, nClasses + 1)
    plsCanScores = np.zeros((5, np.alen(nComponents)))
    for i, n in enumerate(nComponents):
        plscan = PLSCanonical(n_components=n)
        plscan.fit(Xtrain, Ytrain)
        XtrainT = plscan.transform(Xtrain)
        XtestT = plscan.transform(Xtest)
        plsCanScores[:, i] = util.classify(XtrainT, XtestT, labelsTrain,
                                           labelsTest)

    plscan = PLSCanonical(n_components=2)
    plscan.fit(Xtrain, Ytrain)
    xt = plscan.transform(Xtrain)
    fig = plt.figure()
    util.plotData(fig, xt, labelsTrain, classColors)
    plt.title('First 2 components of projected data')

    #%% Plot accuracies for PLSSVD
    plt.figure()
예제 #25
0
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.cross_decomposition import PLSCanonical
from sklearn.neighbors import KNeighborsClassifier
import math
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

dataSet = datasets.load_digits()
data = dataSet["data"]
target = dataSet["target"]

plsca = PLSCanonical(n_components=2)
plsca.fit(data, target)

X_train_r, Y_train_r = plsca.transform(data, target)

knn = math.sqrt(len(X_train_r))
knn = KNeighborsClassifier(n_neighbors=int(knn))

Y_train_r = [int(Y_train_r[i]) for i in range(0, len(Y_train_r))]

k = knn.fit(X_train_r, Y_train_r)
print(k.score(X_train_r, Y_train_r))
knn = KNeighborsClassifier(n_neighbors=4)

sfs = SFS(knn,
          k_features=3,
          forward=True,
          floating=False,
          verbose=2,
예제 #26
0
 def _create_model(self):
     return PLSCanonical()
예제 #27
0
    for i in range (5):
        plt.plot(nComponents,plsSvdScores[i,:],lw=3)

    plt.xlim(1,np.amax(nComponents))
    plt.title('PLS SVD accuracy')
    plt.xlabel('Number of components')
    plt.ylabel('accuracy')
    plt.legend (['LR','LDA','GNB','Linear SVM','rbf SVM'],loc='lower right')
    plt.grid(True)

if (0):
    #%% PLS Cannonical
    nComponents = np.arange(1,nClasses+1)
    plsCanScores = np.zeros((5,np.alen(nComponents)))
    for i,n in enumerate(nComponents):
        plscan = PLSCanonical(n_components=n)
        plscan.fit(Xtrain,Ytrain)
        XtrainT = plscan.transform(Xtrain)
        XtestT = plscan.transform(Xtest)
        plsCanScores[:,i] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)

    
    plscan = PLSCanonical(n_components=2)
    plscan.fit(Xtrain,Ytrain)
    xt = plscan.transform(Xtrain)
    fig = plt.figure()
    util.plotData(fig,xt,labelsTrain,classColors)
    plt.title('First 2 components of projected data')
    

    #%% Plot accuracies for PLSSVD 
예제 #28
0
 def fit_scikit_pls(self, train_set_1, train_set_2):
     self.PLS = PLSCanonical(n_components=self.latent_dims, scale=False)
     self.PLS.fit(train_set_1, train_set_2)
     self.score_list = [self.PLS.x_scores_, self.PLS.y_scores_]
     self.weights_list = [self.PLS.x_weights_, self.PLS.y_weights_]
     return self
예제 #29
0
 def __init__(self, allow_missing_values=False):
     # Explicitly initialise both constructors:
     super(MultiCurvePlsPredictor,
           self).__init__(classic_estimator=PLSCanonical(),
                          allow_missing_values=allow_missing_values)
예제 #30
0
파일: Zad4.py 프로젝트: justynias/AIS
#Następnie sprawdź sprawność klasyfikatora kNN dla zbioru testowego ograniczonego do wybranego 
#podzbioru cech. Parametr kk przyjmij jako pierwiastek z liczby obiektów w zbiorze. 
#Dla jakiej liczby cech osiągnięto najlepsze rezultaty?
from sklearn import datasets
from sklearn import model_selection
from sklearn.neighbors import KNeighborsClassifier
import math
from sklearn.cross_decomposition import PLSCanonical

mnist_dataset = datasets.load_digits()
X = mnist_dataset.data
Y = mnist_dataset.target
target_names = mnist_dataset.target_names
train, test, train_targets, test_targets = model_selection.train_test_split(X, Y, train_size=0.5,test_size=0.5)

                                                                            
max = 0
max_n_components = 0
for i in range(1, 10):
    plsca = PLSCanonical(n_components=i)
    plsca.fit(train, train_targets)
    X_r = plsca.fit(train, train_targets).transform(train)
    Y_r = plsca.fit(test, test_targets).transform(test)
    clf = KNeighborsClassifier(round(math.sqrt(X.shape[0])),weights="uniform", metric="euclidean")
    clf.fit(X_r, train_targets)
    print(i, ":", clf.score(Y_r, test_targets))
    if max < clf.score(Y_r, test_targets):
        max = clf.score(Y_r, test_targets)
        max_n_components = i
        
print("Best result for:", max_n_components)
예제 #31
0
파일: regression.py 프로젝트: cortu01/CODEX
    def get_algorithm(self):
        '''
        Inputs:
            algorithm (string)  - Name of the regressor to run.  Follows Sklearn naming conventions.
                                    Available keys: ARDRegression | AdaBoostRegressor | BaggingRegressor | BayesianRidge | CCA
                                                    DecisionTreeRegressor | ElasticNet | ExtraTreeRegressor
                                                    ExtraTreesRegressor | GaussianProcessRegressor | GradientBoostingRegressor
                                                    HuberRegressor | KNeighborsRegressor | KernelRidge | Lars | Lasso
                                                    LassoLars | LinearRegression | LinearSVR | MLPRegressor | NuSVR | 
                                                    OrthogonalMatchingPursuit | PLSCanonical | PLSRegression | 
                                                    PassiveAggressiveRegressor | RANSACRegressor | RandomForestRegressor | 
                                                    Ridge | SGDRegressor | SVR | TheilSenRegressor | TransformedTargetRegressor

                                    Currently not supporting: ElasticNetCV | LarsCV | LassoCV | LassoLarsCV | LassoLarsIC | 
                                                    MultiTaskElasticNet | MultiTaskElasticNetCV | MultiTaskLasso | MultiTaskLassoCV |
                                                    OrthogonalMatchingPursuitCV | RidgeCV | RadiusNeighborsRegressor
        Outputs:

        Notes:
            Scoring Metrics: https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter
        '''
        if (self.algorithmName == "ARDRegression"): algorithm = ARDRegression()
        elif (self.algorithmName == "AdaBoostRegressor"):
            algorithm = AdaBoostRegressor()
        elif (self.algorithmName == "BaggingRegressor"):
            algorithm = BaggingRegressor()
        elif (self.algorithmName == "BayesianRidge"):
            algorithm = BayesianRidge()
        elif (self.algorithmName == "CCA"):
            algorithm = CCA()
        elif (self.algorithmName == "DecisionTreeRegressor"):
            algorithm = DecisionTreeRegressor()
        elif (self.algorithmName == "ElasticNet"):
            algorithm = ElasticNet()
        elif (self.algorithmName == "ExtraTreeRegressor"):
            algorithm = ExtraTreeRegressor()
        elif (self.algorithmName == "ExtraTreesRegressor"):
            algorithm = ExtraTreesRegressor()
        elif (self.algorithmName == "GaussianProcessRegressor"):
            algorithm = GaussianProcessRegressor()
        elif (self.algorithmName == "GradientBoostingRegressor"):
            algorithm = GradientBoostingRegressor()
        elif (self.algorithmName == "HuberRegressor"):
            algorithm = HuberRegressor()
        elif (self.algorithmName == "KNeighborsRegressor"):
            algorithm = KNeighborsRegressor()
        elif (self.algorithmName == "KernelRidge"):
            algorithm = KernelRidge()
        elif (self.algorithmName == "Lars"):
            algorithm = Lars()
        elif (self.algorithmName == "Lasso"):
            algorithm = Lasso()
        elif (self.algorithmName == "LassoLars"):
            algorithm = LassoLars()
        elif (self.algorithmName == "LinearRegression"):
            algorithm = LinearRegression()
        elif (self.algorithmName == "LinearSVR"):
            algorithm = LinearSVR()
        elif (self.algorithmName == "MLPRegressor"):
            algorithm = MLPRegressor()
        elif (self.algorithmName == "NuSVR"):
            algorithm = NuSVR()
        elif (self.algorithmName == "OrthogonalMatchingPursuit"):
            algorithm = OrthogonalMatchingPursuit()
        elif (self.algorithmName == "PLSCanonical"):
            algorithm = PLSCanonical()
        elif (self.algorithmName == "PLSRegression"):
            algorithm = PLSRegression()
        elif (self.algorithmName == "PassiveAggressiveRegressor"):
            algorithm = PassiveAggressiveRegressor()
        elif (self.algorithmName == "RANSACRegressor"):
            algorithm = RANSACRegressor()
        elif (self.algorithmName == "RandomForestRegressor"):
            algorithm = RandomForestRegressor()
        elif (self.algorithmName == "Ridge"):
            algorithm = Ridge()
        elif (self.algorithmName == "SGDRegressor"):
            algorithm = SGDRegressor()
        elif (self.algorithmName == "SVR"):
            algorithm = SVR()
        elif (self.algorithmName == "TheilSenRegressor"):
            algorithm = TheilSenRegressor()
        elif (self.algorithmName == "TransformedTargetRegressor"):
            algorithm = TransformedTargetRegressor()
        else:
            return None

        return algorithm
예제 #32
0
ursis = []
graph_features = []

# label_col = df1.loc['URSI']
# print('label col:')
# print(label_col)

ursi_ids = df1.iloc[:, 0]

linreg = LinearRegression(normalize=True)
lasso = Lasso(fit_intercept=True, normalize=True)
ransac = RANSACRegressor()

pls = PLSRegression()
cca = CCA()
pls_ca = PLSCanonical()

rf = RandomForestRegressor(n_estimators=50, n_jobs=4)
gp = GaussianProcessRegressor()
ir = IsotonicRegression()

svr_lin = SVR(kernel='linear')
svr_rbf = SVR()

classifiers = [linreg, lasso, pls, svr_lin, svr_rbf, rf, gp]
classifier_names = ['LR', 'Lasso', 'PLS', 'SVR (lin)', 'SVR (rbf)', 'RF', 'GP']

prediction_targets = ['all', 'CCI']

r = {}
mse = {}
예제 #33
0
#correct not accurate
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.cross_decomposition import PLSRegression
from sklearn.cross_decomposition import PLSCanonical
df=pd.read_csv('newdata.csv')
x=df.drop(['tag'],axis=1)
y=df.drop(['kx','ky','kz','wa','wb','wc','wd','we','wf'],axis=1)
X_train , X_test , Y_train , Y_test = train_test_split(x,y , random_state=5)

plsr=PLSRegression()
plsr.fit(X_train,Y_train)

plsc=PLSCanonical()
plsc.fit(X_train,Y_train)

print (plsr.score(X_test,Y_test))
print (plsc.score(X_test,Y_test))
예제 #34
0
def GetAllModelsForComparison(X_train, Y_train):
    models = {
        'ARDRegression': ARDRegression(),
        'BayesianRidge': BayesianRidge(),
        'ElasticNet': ElasticNet(),
        'ElasticNetCV': ElasticNetCV(),
        'Hinge': Hinge(),
        #'Huber': Huber(),
        'HuberRegressor': HuberRegressor(),
        'Lars': Lars(),
        'LarsCV': LarsCV(),
        'Lasso': Lasso(),
        'LassoCV': LassoCV(),
        'LassoLars': LassoLars(),
        'LassoLarsCV': LassoLarsCV(),
        'LinearRegression': LinearRegression(),
        'Log': Log(),
        'LogisticRegression': LogisticRegression(),
        'LogisticRegressionCV': LogisticRegressionCV(),
        'ModifiedHuber': ModifiedHuber(),
        'MultiTaskElasticNet': MultiTaskElasticNet(),
        'MultiTaskElasticNetCV': MultiTaskElasticNetCV(),
        'MultiTaskLasso': MultiTaskLasso(),
        'MultiTaskLassoCV': MultiTaskLassoCV(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'OrthogonalMatchingPursuitCV': OrthogonalMatchingPursuitCV(),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),
        'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),
        'Perceptron': Perceptron(),
        'RANSACRegressor': RANSACRegressor(),
        #'RandomizedLasso': RandomizedLasso(),
        #'RandomizedLogisticRegression': RandomizedLogisticRegression(),
        'Ridge': Ridge(),
        'RidgeCV': RidgeCV(),
        'RidgeClassifier': RidgeClassifier(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        'SquaredLoss': SquaredLoss(),
        'TheilSenRegressor': TheilSenRegressor(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LinearClassifierMixin': LinearClassifierMixin(),
        'LinearDiscriminantAnalysis': LinearDiscriminantAnalysis(),
        'QuadraticDiscriminantAnalysis': QuadraticDiscriminantAnalysis(),
        'StandardScaler': StandardScaler(),
        'TransformerMixin': TransformerMixin(),
        'BaseEstimator': BaseEstimator(),
        'KernelRidge': KernelRidge(),
        'RegressorMixin': RegressorMixin(),
        'LinearSVC': LinearSVC(),
        'LinearSVR': LinearSVR(),
        'NuSVC': NuSVC(),
        'NuSVR': NuSVR(),
        'OneClassSVM': OneClassSVM(),
        'SVC': SVC(),
        'SVR': SVR(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        #'BallTree': BallTree(),
        #'DistanceMetric': DistanceMetric(),
        #'KDTree': KDTree(),
        'KNeighborsClassifier': KNeighborsClassifier(),
        'KNeighborsRegressor': KNeighborsRegressor(),
        'KernelDensity': KernelDensity(),
        #'LSHForest': LSHForest(),
        'LocalOutlierFactor': LocalOutlierFactor(),
        'NearestCentroid': NearestCentroid(),
        'NearestNeighbors': NearestNeighbors(),
        'RadiusNeighborsClassifier': RadiusNeighborsClassifier(),
        'RadiusNeighborsRegressor': RadiusNeighborsRegressor(),
        #'GaussianProcess': GaussianProcess(),
        'GaussianProcessRegressor': GaussianProcessRegressor(),
        'GaussianProcessClassifier': GaussianProcessClassifier(),
        'CCA': CCA(),
        'PLSCanonical': PLSCanonical(),
        'PLSRegression': PLSRegression(),
        'PLSSVD': PLSSVD(),
        #'ABCMeta': ABCMeta(),
        #'BaseDiscreteNB': BaseDiscreteNB(),
        'BaseEstimator': BaseEstimator(),
        #'BaseNB': BaseNB(),
        'BernoulliNB': BernoulliNB(),
        'ClassifierMixin': ClassifierMixin(),
        'GaussianNB': GaussianNB(),
        'LabelBinarizer': LabelBinarizer(),
        'MultinomialNB': MultinomialNB(),
        'DecisionTreeClassifier': DecisionTreeClassifier(),
        'DecisionTreeRegressor': DecisionTreeRegressor(),
        'ExtraTreeClassifier': ExtraTreeClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'BaggingClassifier': BaggingClassifier(),
        'BaggingRegressor': BaggingRegressor(),
        #'BaseEnsemble': BaseEnsemble(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'IsolationForest': IsolationForest(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RandomForestRegressor': RandomForestRegressor(),
        'RandomTreesEmbedding': RandomTreesEmbedding(),
        #'VotingClassifier': VotingClassifier(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LabelBinarizer': LabelBinarizer(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'OneVsOneClassifier': OneVsOneClassifier(),
        #'OneVsRestClassifier': OneVsRestClassifier(),
        #'OutputCodeClassifier': OutputCodeClassifier(),
        'Parallel': Parallel(),
        #'ABCMeta': ABCMeta(),
        'BaseEstimator': BaseEstimator(),
        #'ClassifierChain': ClassifierChain(),
        'ClassifierMixin': ClassifierMixin(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'MultiOutputClassifier': MultiOutputClassifier(),
        #'MultiOutputEstimator': MultiOutputEstimator(),
        #'MultiOutputRegressor': MultiOutputRegressor(),
        'Parallel': Parallel(),
        'RegressorMixin': RegressorMixin(),
        'LabelPropagation': LabelPropagation(),
        'LabelSpreading': LabelSpreading(),
        'BaseEstimator': BaseEstimator(),
        'IsotonicRegression': IsotonicRegression(),
        'RegressorMixin': RegressorMixin(),
        'TransformerMixin': TransformerMixin(),
        'BernoulliRBM': BernoulliRBM(),
        'MLPClassifier': MLPClassifier(),
        'MLPRegressor': MLPRegressor()
    }
    return models
예제 #35
0
def test_sanity_check_pls_canonical_random():
    # Sanity check for PLSCanonical on random data
    # The results were checked against the R-package plspm
    n = 500
    p_noise = 10
    q_noise = 5
    # 2 latents vars:
    rng = check_random_state(11)
    l1 = rng.normal(size=n)
    l2 = rng.normal(size=n)
    latents = np.array([l1, l1, l2, l2]).T
    X = latents + rng.normal(size=4 * n).reshape((n, 4))
    Y = latents + rng.normal(size=4 * n).reshape((n, 4))
    X = np.concatenate((X, rng.normal(size=p_noise * n).reshape(n, p_noise)),
                       axis=1)
    Y = np.concatenate((Y, rng.normal(size=q_noise * n).reshape(n, q_noise)),
                       axis=1)

    pls = PLSCanonical(n_components=3)
    pls.fit(X, Y)

    expected_x_weights = np.array([
        [0.65803719, 0.19197924, 0.21769083],
        [0.7009113, 0.13303969, -0.15376699],
        [0.13528197, -0.68636408, 0.13856546],
        [0.16854574, -0.66788088, -0.12485304],
        [-0.03232333, -0.04189855, 0.40690153],
        [0.1148816, -0.09643158, 0.1613305],
        [0.04792138, -0.02384992, 0.17175319],
        [-0.06781, -0.01666137, -0.18556747],
        [-0.00266945, -0.00160224, 0.11893098],
        [-0.00849528, -0.07706095, 0.1570547],
        [-0.00949471, -0.02964127, 0.34657036],
        [-0.03572177, 0.0945091, 0.3414855],
        [0.05584937, -0.02028961, -0.57682568],
        [0.05744254, -0.01482333, -0.17431274],
    ])

    expected_x_loadings = np.array([
        [0.65649254, 0.1847647, 0.15270699],
        [0.67554234, 0.15237508, -0.09182247],
        [0.19219925, -0.67750975, 0.08673128],
        [0.2133631, -0.67034809, -0.08835483],
        [-0.03178912, -0.06668336, 0.43395268],
        [0.15684588, -0.13350241, 0.20578984],
        [0.03337736, -0.03807306, 0.09871553],
        [-0.06199844, 0.01559854, -0.1881785],
        [0.00406146, -0.00587025, 0.16413253],
        [-0.00374239, -0.05848466, 0.19140336],
        [0.00139214, -0.01033161, 0.32239136],
        [-0.05292828, 0.0953533, 0.31916881],
        [0.04031924, -0.01961045, -0.65174036],
        [0.06172484, -0.06597366, -0.1244497],
    ])

    expected_y_weights = np.array([
        [0.66101097, 0.18672553, 0.22826092],
        [0.69347861, 0.18463471, -0.23995597],
        [0.14462724, -0.66504085, 0.17082434],
        [0.22247955, -0.6932605, -0.09832993],
        [0.07035859, 0.00714283, 0.67810124],
        [0.07765351, -0.0105204, -0.44108074],
        [-0.00917056, 0.04322147, 0.10062478],
        [-0.01909512, 0.06182718, 0.28830475],
        [0.01756709, 0.04797666, 0.32225745],
    ])

    expected_y_loadings = np.array([
        [0.68568625, 0.1674376, 0.0969508],
        [0.68782064, 0.20375837, -0.1164448],
        [0.11712173, -0.68046903, 0.12001505],
        [0.17860457, -0.6798319, -0.05089681],
        [0.06265739, -0.0277703, 0.74729584],
        [0.0914178, 0.00403751, -0.5135078],
        [-0.02196918, -0.01377169, 0.09564505],
        [-0.03288952, 0.09039729, 0.31858973],
        [0.04287624, 0.05254676, 0.27836841],
    ])

    assert_array_almost_equal(np.abs(pls.x_loadings_),
                              np.abs(expected_x_loadings))
    assert_array_almost_equal(np.abs(pls.x_weights_),
                              np.abs(expected_x_weights))
    assert_array_almost_equal(np.abs(pls.y_loadings_),
                              np.abs(expected_y_loadings))
    assert_array_almost_equal(np.abs(pls.y_weights_),
                              np.abs(expected_y_weights))

    x_loadings_sign_flip = np.sign(pls.x_loadings_ / expected_x_loadings)
    x_weights_sign_flip = np.sign(pls.x_weights_ / expected_x_weights)
    y_weights_sign_flip = np.sign(pls.y_weights_ / expected_y_weights)
    y_loadings_sign_flip = np.sign(pls.y_loadings_ / expected_y_loadings)
    assert_array_almost_equal(x_loadings_sign_flip, x_weights_sign_flip)
    assert_array_almost_equal(y_loadings_sign_flip, y_weights_sign_flip)

    assert_matrix_orthogonal(pls.x_weights_)
    assert_matrix_orthogonal(pls.y_weights_)

    assert_matrix_orthogonal(pls._x_scores)
    assert_matrix_orthogonal(pls._y_scores)
예제 #36
0
              tol=tol,
              shuffle=True,
              verbose=0,
              epsilon=0.1,
              random_state=random_state,
              learning_rate='invscaling',
              eta0=0.01,
              power_t=0.25,
              early_stopping=False,
              validation_fraction=0.1,
              n_iter_no_change=5,
              warm_start=False,
              average=False),
 PLSCanonical(n_components=9,
              scale=False,
              algorithm='nipals',
              max_iter=1000,
              tol=1e-3,
              copy=True),
 CCA(n_components=9, scale=False, max_iter=1000, tol=1e-3, copy=True),
 MLPRegressor(hidden_layer_sizes=(500, 30),
              activation='relu',
              solver='lbfgs',
              alpha=0.0001,
              batch_size='auto',
              learning_rate='constant',
              learning_rate_init=0.00000001,
              power_t=0.5,
              max_iter=100000,
              shuffle=True,
              random_state=random_state,
              tol=tol,
예제 #37
0
def training_lda_TD4_intra(my_clfs, trains, classes, **kw):

    start_time = time.time()
    if(kw.has_key('log_fold')):
        log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num = kw['action_num']
    cv = 3
    results = []
    results.append(
        ['Feat', 'Algorithm','n_components', 'Channel_Pos', 'Accuracy', 'std'])
    log_file = 'feat_'+kw['feature_type']+'_intra'

    clf = sklearn.lda.LDA(solver='svd', shrinkage=None, priors=None,
                          n_components=None, store_covariance=False,
                          tol=0.0001)

    data_num = trains.shape[0]/action_num

    scores = sklearn.cross_validation.cross_val_score(clf, trains, classes, cv=cv)
    results.append(['feat_TD4_cv_'+str(cv), 'lda', 'ALL', 0, scores.mean(), scores.std()])
    
    # 组内训练策略 9组数据
    print '组内训练.............'
    for idx, channel_pos in enumerate(kw['pos_list']):
        # print '----training TD4 intra , channel_pos: ', channel_pos,'......'
        trains_intra = trains[:,idx*chan_len: idx*chan_len+chan_len]

        scores = sklearn.cross_validation.cross_val_score(
            clf, trains_intra, classes, cv=cv)
        results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, channel_pos, scores.mean(), scores.std()])


    # 中心训练策略
    print '中心训练策略.............'
    trains_intra_S0 = trains[:,0:chan_len]
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        tests_shift = trains[:,idx*chan_len: idx*chan_len+chan_len]
        # if channel_pos == 'L2':
        #     print idx*chan_len, idx*chan_len+chan_len, tests_shift.shape, trains.shape
        #     sys.exit(0)
        scores = clf.fit(trains_intra_S0, classes).score(tests_shift, classes)
        results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, 'train S0' + ' test ' + channel_pos, scores.mean(), scores.std()])
    
    # 组训练策略(不同于组内训练策略) 5-fold
    print '组训练策略.............'
    trains_intra_S0 = trains[:,0:chan_len]
    kf = KFold(data_num, n_folds=cv)
    for idx, channel_pos in enumerate(kw['pos_list']):
        if channel_pos == 'S0':
            continue
        itera = cv
        scores = np.zeros( (itera,) )
        # stds = np.zeros( (itera,) )
        itera -= 1
        trains_shift = trains[:,idx*chan_len: idx*chan_len+chan_len]
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)

            for action_idx in range(action_num):
                train_idx_all = np.concatenate( (train_idx_all, train_idx*(action_idx+1)), axis=0)
                test_idx_all = np.concatenate( (test_idx_all, test_idx*(action_idx+1)), axis=0)

            X_train = np.concatenate( (trains_intra_S0[train_idx_all], trains_shift[train_idx_all]), axis=0)
            y_train = np.concatenate( (classes[train_idx_all], classes[train_idx_all]), axis=0)

            X_test = trains_shift[test_idx_all]
            y_test = classes[test_idx_all]

            # X_test = trains_shift
            # y_test = classes

            score = clf.fit(X_train, y_train).score(X_test, y_test)
            scores[itera] = score.mean()
            
            itera -= 1
        # print scores
        results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, 'S0 + '+channel_pos, np.mean(scores), np.std(scores)])
    
    # 基于CCA的训练策略 5-fold 交叉验证
    print 'CCA训练策略.............'
    trains_S0 = trains[:,0:chan_len]
    n_components_list = [6, 8, 10, 12, 14, 16]              # 子空间维数
    # n_components_list = [12,14,16]
    kf = KFold(data_num, n_folds=cv)
    for n_components in n_components_list:
        
        for idx, channel_pos in enumerate(kw['pos_list']):
            if channel_pos == 'S0':
                continue
            itera = cv
            scores = np.zeros( (itera,) )
            stds = np.zeros( (itera,) )
            itera -= 1
            trains_shift = trains[:,idx*chan_len: idx*chan_len+chan_len]
            for train_idx, test_idx in kf:
                train_idx_all = np.array([], np.int)
                test_idx_all = np.array([], np.int)
                for action_idx in range(action_num):
                    train_idx_all = np.concatenate( (train_idx_all, train_idx*(action_idx+1)), axis=0)
                    test_idx_all = np.concatenate( (test_idx_all, test_idx*(action_idx+1)), axis=0)
                # print train_idx_all.shape, train_idx_all, test_idx_all.shape, test_idx_all
                # plsca.fit(trains_shift[train_idx_all], trains_S0[train_idx_all])
                plsca = PLSCanonical(n_components=n_components)
                plsca.fit(trains_shift, trains_S0)
                trains_shift_cca, trains_S0_cca = plsca.transform(trains_shift, trains_S0)
                X_trains = np.concatenate( (trains_S0_cca, trains_shift_cca[train_idx_all]), axis=0)
                y_trains = np.concatenate( (classes, classes[train_idx_all]), axis=0)
                score = clf.fit(X_trains, y_trains).score(trains_shift_cca[test_idx_all], classes[test_idx_all])

                scores[itera] = score.mean()
                # stds[itera] = score.std()
                itera -= 1
            
            results.append(['feat_TD4_cv_'+str(cv), 'lda_cca', n_components, 'S0 + '+channel_pos, np.mean(scores), np.std(scores)])
    
    log_result(results, log_fold + '/' + log_file + '_action_1-'+str(action_num), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_action_1-'+str(action_num)
    print '----training TD4 time elapsed:', time.time() - start_time
예제 #38
0
def training_lda_TD4_inter(my_clfs, trains_S0, trains_shift, classes, **kw):
    print 'training_lda_TD4_inter.........'
    start_time = time.time()

    log_fold = root_path + '/result/' + kw['log_fold']
    new_fold(log_fold)

    chan_len = kw['chan_len']
    action_num  = kw['action_num']

    print "----training "+kw['feature_type']+" inter, training by position O, testing by electrode shift "

    cv = 5
    results = []
    results.append(['Feat', 'Algorithm','Channel_Pos', 'Accuracy', 'std'])
    log_file = 'feat_'+kw['feature_type']+'_inter'

    clf = sklearn.lda.LDA(solver='svd', shrinkage=None, priors=None,
                          n_components=None, store_covariance=False,
                          tol=0.0001)

    data_num = trains_S0.shape[0]/action_num
    # print data_num
    
    scores = sklearn.cross_validation.cross_val_score(
        clf, trains_S0, classes, cv=cv)
    results.append(['feat_TD4_cv_'+str(cv), 'lda', 'S0',
                    scores.mean(), scores.std()])
    
    kf = KFold(data_num, n_folds=cv)
    
    for idx, channel_pos in enumerate(kw['pos_list']):

        X_test = trains_shift[:,idx*chan_len:idx*chan_len+chan_len]
        y_test = classes

        iteration = cv
        scores = np.zeros((iteration,))
        cca_scores = np.zeros((iteration,))
        
        
        iteration -= 1
        for train_idx, test_idx in kf:
            train_idx_all = np.array([], np.int)
            test_idx_all = np.array([], np.int)
            for action_idx in range(action_num):
                train_idx_all = np.concatenate( (train_idx_all, train_idx*action_idx), axis=0)
                test_idx_all = np.concatenate( (test_idx_all, test_idx*action_idx), axis=0)

            # X_train, y_train = trains_S0[train_idx_all], classes[train_idx_all]
            X_train, y_train = trains_S0, classes

            X_train_shift, y_train_shift = X_test[train_idx_all], classes[train_idx_all]
            
            X_train_all = np.concatenate( (X_train, X_train_shift), axis=0)
            y_train_all = np.concatenate( (y_train, y_train_shift), axis=0)
            
            sys.exit(0)
            score_inter = clf.fit(X_train_all, y_train_all).score(X_test, y_test)
            scores[iteration] = score_inter.mean()
            # print X_train.shape, y_train.shape
            

            if channel_pos != 'S0':

                # plsca = joblib.load(transform_fold+'/cca_transform_'+kw['subject']+'_'+channel_pos+'.model')
                plsca = PLSCanonical(n_components=14)
                # print X_test.shape, X_train.shape
                # sys.exit(0)
                plsca.fit(X_test[train_idx], X_train)
                X_test_cca, X_train_cca = plsca.transform(X_test, X_train)
                cca_score = clf.fit(X_train_cca, y_train).score(X_test_cca, y_test)
                cca_scores[iteration] = cca_score.mean()

            iteration -= 1

        # print scores
        # print cca_scores
        # sys.exit(0)
        results.append(['feat_TD4', 'lda', 
                        channel_pos, np.mean(scores), np.std(scores)])
        results.append(['feat_TD4', 'lda_cca', 
                        channel_pos, np.mean(cca_scores), np.std(cca_scores)])

            
    
    
    log_result(results, log_fold + '/' + log_file + '_' + str(kw['num']), 2)
    print '----Log Fold:', log_fold, ', log_file: ', log_file + '_' + channel_pos + '_' + str(kw['num'])
    print '----training TD4 time elapsed:', time.time() - start_time

    

    # mean_shift = 0
    # std_shift = 0
    # for i in range(2, 10):
    #     mean_shift += results[i][4]
    #     std_shift += results[i][5]
    # mean_shift /= 9
    # std_shift /= 9

    # results.append(['feat_TD4','lda(svd;tol=0.0001)', 'Shift_means', '1.0', mean_shift, std_shift])

    # mean_all = 0
    # std_all = 0
    # for i in range(1, 10):
    #     mean_all += results[i][4]
    #     std_all += results[i][5]
    # mean_all /= 9
    # std_all /= 9
예제 #39
0
        temp4 = []
        for e in temp1[:]:
            temp4.append(e)
        for e in temp2[:]:
            temp4.append(e)
        if len(temp4) == 600 and len(temp3) == 300:
            x_n.append(temp4)
            y_n.append(temp3)

npx = np.asarray(x, dtype=np.float64)
npy = np.asarray(y, dtype=np.float64)

npxn = np.asarray(x_n, dtype=np.float64)
npyn = np.asarray(y_n, dtype=np.float64)
cca = PLSCanonical(n_components=2)
cca.fit_transform(npx, npy)
npx, npy = cca.transform(npx, npy)
npxn, npyn = cca.transform(npxn, npyn)

pls.fit(npx, npy)
params = pls.get_params(deep=True)
print(params)
pls.set_params(**params)

y_score = pls.predict(npxn)

sim_count = 0
tol = 0.1

for index in range(len(y_score)):
예제 #40
0
X_train = X[:n // 2]
Y_train = Y[:n // 2]
X_test = X[n // 2:]
Y_test = Y[n // 2:]

print("Corr(X)")
print(np.round(np.corrcoef(X.T), 2))
print("Corr(Y)")
print(np.round(np.corrcoef(Y.T), 2))

# #############################################################################
# Canonical (symmetric) PLS

# Transform data
# ~~~~~~~~~~~~~~
plsca = PLSCanonical(n_components=2)
plsca.fit(X_train, Y_train)
X_train_r, Y_train_r = plsca.transform(X_train, Y_train)
X_test_r, Y_test_r = plsca.transform(X_test, Y_test)

# Scatter plot of scores
# ~~~~~~~~~~~~~~~~~~~~~~
# 1) On diagonal plot X vs Y scores on each components
plt.figure(figsize=(12, 8))
plt.subplot(221)
plt.scatter(X_train_r[:, 0],
            Y_train_r[:, 0],
            label="train",
            marker="o",
            c="b",
            s=25)
예제 #41
0
    linear_model.LassoLarsCV(),
    linear_model.LassoLarsIC(),
    linear_model.LinearRegression(),
    LinearSVR(),
    #linear_model.LogisticRegression(),
    #linear_model.LogisticRegressionCV(),
    MLPRegressor(),
    #linear_model.ModifiedHuber(),
    #linear_model.MultiTaskElasticNet(),
    #linear_model.MultiTaskElasticNetCV(),
    #linear_model.MultiTaskLasso(),
    #linear_model.MultiTaskLassoCV(),
    NuSVR(),
    linear_model.OrthogonalMatchingPursuit(),
    linear_model.OrthogonalMatchingPursuitCV(),
    PLSCanonical(),
    PLSRegression(),
    linear_model.PassiveAggressiveRegressor(),
    linear_model.RANSACRegressor(),
    RadiusNeighborsRegressor(),
    RandomForestRegressor(),
    #linear_model.RandomizedLasso(),
    #linear_model.RandomizedLogisticRegression(),
    linear_model.RANSACRegressor(),
    linear_model.Ridge(),
    linear_model.RidgeCV(),
    linear_model.SGDRegressor(),
    SVR(),
    linear_model.TheilSenRegressor()
]