def correlation_matching(I_tr, T_tr, I_te, T_te, n_comps): """ Learns correlation matching (CM) over I_tr and T_tr and applies it to I_tr, T_tr, I_te, T_te Parameters ---------- I_tr: np.ndarray [shape=(n_tr, d_I)] image data matrix for training T_tr: np.ndarray [shape=(n_tr, d_T)] text data matrix for training I_te: np.ndarray [shape=(n_te, d_I)] image data matrix for testing T_te: np.ndarray [shape=(n_te, d_T)] text data matrix for testing n_comps: int > 0 [scalar] number of canonical componens to use Returns ------- I_tr_cca : np.ndarray [shape=(n_tr, n_comps)] image data matrix represetned in correlation space T_tr_cca : np.ndarray [shape=(n_tr, n_comps)] text data matrix represetned in correlation space I_te_cca : np.ndarray [shape=(n_te, n_comps)] image data matrix represetned in correlation space T_te_cca : np.ndarray [shape=(n_te, n_comps)] text data matrix represetned in correlation space """ # sclale image and text data I_scaler = StandardScaler() I_tr = I_scaler.fit_transform(I_tr) I_te = I_scaler.transform(I_te) T_scaler = StandardScaler() T_tr = T_scaler.fit_transform(T_tr) T_te = T_scaler.transform(T_te) cca = PLSCanonical(n_components=n_comps, scale=False) cca.fit(I_tr, T_tr) I_tr_cca, T_tr_cca = cca.transform(I_tr, T_tr) I_te_cca, T_te_cca = cca.transform(I_te, T_te) return I_tr_cca, T_tr_cca, I_te_cca, T_te_cca
def feature_action_sensitivity(feature_type='TD4'): ''' 对每个特征,分析其在不移位和移位情况下的协方差 ''' results = [] subjects = ['subject_' + str(i + 1) for i in range(1)] channel_pos_list = ['S0', # 中心位置 'U1', 'U2', 'D1', 'D2', 'L1', 'L2', 'R1', 'R2'] # 上 下 左 右 pos_num = len(channel_pos_list) actions = [i+1 for i in range(7)] action_num = len(actions) # 7 动作类型个数 if feature_type == 'TD4': feature_list = ['MAV', 'ZC', 'SSC', 'WL'] elif feature_type == 'TD5': feature_list = ['MAV', 'ZC', 'SSC', 'WL','RMS'] feat_num = len(feature_list) # 4 特征维度 groups = [i+1 for i in range(4)] group_num = len(groups) # 4 通道数 group_span = group_num*feat_num # print group_span action_span = feat_num*group_num # 16 # print groups, channel_num, channel_span, feat_num train_dir = 'train4_250_100' results.append(['subject', 'action', 'feature', 'group', 'means_shift', 'std_shift'] ) plsca = PLSCanonical(n_components=2) # pos = 1 k=0 for pos_idx, pos_name in enumerate(channel_pos_list[1:]): pos = pos_idx+1 for subject in subjects: # shift_simulation = np.ones((action_num,action_span,2)) trains, classes = data_load.load_feature_dataset(train_dir, subject, feature_type) # m = trains.shape[0] # print trains.shape, classes.shape, m # print group_span, group_span*2 # sys.exit(0) # m = trains.shape[0]*2/3 m = trains.shape[0]/2 X_train = trains[:m, group_span*pos: group_span*(pos+1)] Y_train = trains[:m:, :group_span] X_test = trains[m:, group_span*pos: group_span*(pos+1)] Y_test = trains[m:, :group_span] plsca.fit(X_train, Y_train) X_train_r, Y_train_r = plsca.transform(X_train, Y_train) X_test_r, Y_test_r = plsca.transform(X_test, Y_test) filename=subject+'_'+pos_name # plot_plsc_figure(X_train_r,Y_train_r,X_test_r, Y_test_r, filename) plot_plsc_figure_two(X_train_r,Y_train_r,X_test_r, Y_test_r, filename)
def generate_transform_equations(trains_S0, trains_shift, **kw): print 'generate transform equations.........' new_fold(transform_fold) chan_len = kw['chan_len'] for idx, channel_pos in enumerate(kw['pos_list']): X_trains = trains_shift[:,idx*chan_len:idx*chan_len+chan_len] plsca = PLSCanonical(n_components=12) plsca.fit(X_trains, trains_S0) joblib.dump(plsca, transform_fold+'/cca_transform_'+kw['subject']+'_'+channel_pos+'.model') print 'generate transform equations finished.........'
def drawFaces(emb1, emb2, wordRanking, n, reduction="cut"): """ Plot Chernoff faces for n most/less interesting words From: https://gist.github.com/aflaxman/4043086 :param n: if negative: less interesting :param reduction: :return: """ s1 = None s2 = None if reduction=="cut": s1 = emb1.getSimMatrix()[0:,0:18] s2 = emb2.getSimMatrix()[0:,0:18] elif reduction=="svd": s1 = TruncatedSVD(n_components=k).fit_transform(emb1.getSimMatrix()) s2 = TruncatedSVD(n_components=k).fit_transform(emb2.getSimMatrix()) elif reduction=="cca": #use orginal embeddings, not similarity matrix for reduction cca = PLSCanonical(n_components=18) cca.fit(emb1.m, emb2.m) s1, s2 = cca.transform(emb1.m, emb2.m) interesting = list() name = str(n)+"."+reduction if n<0: #plot uninteresting words n *= -1 interesting = [wordRanking[::-1][i] for i in xrange(n)] else: interesting = [wordRanking[i] for i in xrange(n)] fig = plt.figure(figsize=(11,11)) c = 0 for i in range(n): word = interesting[i] j = emb1.d[word] ax = fig.add_subplot(n,2,c+1,aspect='equal') mpl_cfaces.cface(ax, *s1[j]) #nice for similarity matrix *s1[j][:18] ax.axis([-1.2,1.2,-1.2,1.2]) ax.set_xticks([]) ax.set_yticks([]) ax.set_title(word) ax2 = fig.add_subplot(n,2,c+2,aspect='equal') mpl_cfaces.cface(ax2, *s2[j]) ax2.axis([-1.2,1.2,-1.2,1.2]) ax2.set_xticks([]) ax2.set_yticks([]) ax2.set_title(word) c += 2 plotname = "plots/"+NAME+".cface_s1s2_"+name+".png" fig.savefig(plotname) print("\tSaved Chernoff faces plot in '%s'" % (plotname))
def getCCARanking(self, filter=None): """ Compare how far apart words are in projection into common space by CCA :return: """ cca = PLSCanonical(n_components=self.n) cca.fit(self.emb1.m, self.emb2.m) m1transformed, m2transformed = cca.transform(self.emb1.m, self.emb2.m) #get distances between vectors assert self.emb1.vocab_size == self.emb2.vocab_size distDict = dict() for i in xrange(self.emb1.vocab_size): v1 = m1transformed[i] v2 = m2transformed[i] w = self.emb1.rd[i] distDict[w] = 1-Similarity.euclidean(v1,v2) ranked = sorted(distDict.iteritems(), key=itemgetter(1), reverse=True) if filter is not None: ranked = [(w, s) for (w, s) in distDict.iteritems() if w in filter] return ranked
def plotClustersCCA(self, filter=None): """ Plot clusters in 2dim CCA space: Comparable across embeddings :return: """ if len(self.cluster1) <= 1: cmap1 = plt.get_cmap('jet', 2) else: cmap1 = plt.get_cmap('jet', len(self.cluster1)) cmap1.set_under('gray') if len(self.cluster2) <= 1: cmap2 = plt.get_cmap('jet', 2) else: cmap2 = plt.get_cmap('jet', len(self.cluster2)) cmap2.set_under('gray') cca = PLSCanonical(n_components=2) cca.fit(self.emb1.m, self.emb2.m) m1transformed, m2transformed = cca.transform(self.emb1.m, self.emb2.m) labels1 = [self.emb1.rd[i] for i in xrange(self.emb1.vocab_size)] colors1 = [self.word2cluster1[self.emb1.rd[i]] for i in xrange(self.emb1.vocab_size)] labels2 = [self.emb2.rd[i] for i in xrange(self.emb2.vocab_size)] colors2 = [self.word2cluster2[self.emb2.rd[i]] for i in xrange(self.emb2.vocab_size)] if filter is not None: print("\tFiltering samples to plot") filteredIds = [self.emb1.d[w] for w in filter] #get ids for words in filter m1transformed = m1transformed[filteredIds] m2transformed = m2transformed[filteredIds] labels1 = [l for l in labels1 if l in filter] labels2 = [l for l in labels2 if l in filter] elif m1transformed.shape[0] > 100: #sample indices to display, otherwise it's too messy filteredIds = np.random.randint(low=0, high=m1.transformed.shape[0]) #sample filteredIds m1transformed = m1transformed[filteredIds] m2transformed = m2transformed[filteredIds] labels1 = [l for l in labels1 if l in filter] labels2 = [l for l in labels2 if l in filter] plotWithLabelsAndColors(m1transformed, labels1, colors=colors1, cmap=cmap1, filename="plots/"+NAME+".cca1.png", dimRed="CCA") plotWithLabelsAndColors(m2transformed, labels2, colors=colors2, cmap=cmap2, filename="plots/"+NAME+".cca2.png", dimRed="CCA")
def test_pls_canonical_basics(): # Basic checks for PLSCanonical d = load_linnerud() X = d.data Y = d.target pls = PLSCanonical(n_components=X.shape[1]) pls.fit(X, Y) assert_matrix_orthogonal(pls.x_weights_) assert_matrix_orthogonal(pls.y_weights_) assert_matrix_orthogonal(pls._x_scores) assert_matrix_orthogonal(pls._y_scores) # Check X = TP' and Y = UQ' T = pls._x_scores P = pls.x_loadings_ U = pls._y_scores Q = pls.y_loadings_ # Need to scale first Xc, Yc, x_mean, y_mean, x_std, y_std = _center_scale_xy(X.copy(), Y.copy(), scale=True) assert_array_almost_equal(Xc, np.dot(T, P.T)) assert_array_almost_equal(Yc, np.dot(U, Q.T)) # Check that rotations on training data lead to scores Xt = pls.transform(X) assert_array_almost_equal(Xt, pls._x_scores) Xt, Yt = pls.transform(X, Y) assert_array_almost_equal(Xt, pls._x_scores) assert_array_almost_equal(Yt, pls._y_scores) # Check that inverse_transform works X_back = pls.inverse_transform(Xt) assert_array_almost_equal(X_back, X) _, Y_back = pls.inverse_transform(Xt, Yt) assert_array_almost_equal(Y_back, Y)
class _PLSCanonicalImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X) def predict(self, X): return self._wrapped_model.predict(X)
def plot_compare_cross_decomposition(): # Dataset based latent variables model n = 500 # 2 latents vars: l1 = np.random.normal(size=n) l2 = np.random.normal(size=n) latents = np.array([l1, l1, l2, l2]).T X = latents + np.random.normal(size=4 * n).reshape((n, 4)) Y = latents + np.random.normal(size=4 * n).reshape((n, 4)) X_train = X[:n // 2] Y_train = Y[:n // 2] X_test = X[n // 2:] Y_test = Y[n // 2:] print("Corr(X)") print(np.round(np.corrcoef(X.T), 2)) print("Corr(Y)") print(np.round(np.corrcoef(Y.T), 2)) # ############################################################################# # Canonical (symmetric) PLS # Transform data # ~~~~~~~~~~~~~~ plsca = PLSCanonical(n_components=2) plsca.fit(X_train, Y_train) X_train_r, Y_train_r = plsca.transform(X_train, Y_train) X_test_r, Y_test_r = plsca.transform(X_test, Y_test) # Scatter plot of scores # ~~~~~~~~~~~~~~~~~~~~~~ # 1) On diagonal plot X vs Y scores on each components plt.figure(figsize=(12, 8)) plt.subplot(221) plt.scatter(X_train_r[:, 0], Y_train_r[:, 0], label="train", marker="o", s=25) plt.scatter(X_test_r[:, 0], Y_test_r[:, 0], label="test", marker="o", s=25) plt.xlabel("x scores") plt.ylabel("y scores") plt.title('Comp. 1: X vs Y (test corr = %.2f)' % np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1]) plt.xticks(()) plt.yticks(()) plt.legend(loc="best") plt.subplot(224) plt.scatter(X_train_r[:, 1], Y_train_r[:, 1], label="train", marker="o", s=25) plt.scatter(X_test_r[:, 1], Y_test_r[:, 1], label="test", marker="o", s=25) plt.xlabel("x scores") plt.ylabel("y scores") plt.title('Comp. 2: X vs Y (test corr = %.2f)' % np.corrcoef(X_test_r[:, 1], Y_test_r[:, 1])[0, 1]) plt.xticks(()) plt.yticks(()) plt.legend(loc="best") # 2) Off diagonal plot components 1 vs 2 for X and Y plt.subplot(222) plt.scatter(X_train_r[:, 0], X_train_r[:, 1], label="train", marker="*", s=50) plt.scatter(X_test_r[:, 0], X_test_r[:, 1], label="test", marker="*", s=50) plt.xlabel("X comp. 1") plt.ylabel("X comp. 2") plt.title('X comp. 1 vs X comp. 2 (test corr = %.2f)' % np.corrcoef(X_test_r[:, 0], X_test_r[:, 1])[0, 1]) plt.legend(loc="best") plt.xticks(()) plt.yticks(()) plt.subplot(223) plt.scatter(Y_train_r[:, 0], Y_train_r[:, 1], label="train", marker="*", s=50) plt.scatter(Y_test_r[:, 0], Y_test_r[:, 1], label="test", marker="*", s=50) plt.xlabel("Y comp. 1") plt.ylabel("Y comp. 2") plt.title('Y comp. 1 vs Y comp. 2 , (test corr = %.2f)' % np.corrcoef(Y_test_r[:, 0], Y_test_r[:, 1])[0, 1]) plt.legend(loc="best") plt.xticks(()) plt.yticks(()) plt.show() # ############################################################################# # PLS regression, with multivariate response, a.k.a. PLS2 n = 1000 q = 3 p = 10 X = np.random.normal(size=n * p).reshape((n, p)) B = np.array([[1, 2] + [0] * (p - 2)] * q).T # each Yj = 1*X1 + 2*X2 + noize Y = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5 pls2 = PLSRegression(n_components=3) pls2.fit(X, Y) print("True B (such that: Y = XB + Err)") print(B) # compare pls2.coef_ with B print("Estimated B") print(np.round(pls2.coef_, 1)) pls2.predict(X) # PLS regression, with univariate response, a.k.a. PLS1 n = 1000 p = 10 X = np.random.normal(size=n * p).reshape((n, p)) y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5 pls1 = PLSRegression(n_components=3) pls1.fit(X, y) # note that the number of components exceeds 1 (the dimension of y) print("Estimated betas") print(np.round(pls1.coef_, 1)) # ############################################################################# # CCA (PLS mode B with symmetric deflation) cca = CCA(n_components=2) cca.fit(X_train, Y_train) X_train_r, Y_train_r = cca.transform(X_train, Y_train) X_test_r, Y_test_r = cca.transform(X_test, Y_test)
X_train = X[:n // 2] Y_train = Y[:n // 2] X_test = X[n // 2:] Y_test = Y[n // 2:] print("Corr(X)") print(np.round(np.corrcoef(X.T), 2)) print("Corr(Y)") print(np.round(np.corrcoef(Y.T), 2)) # ############################################################################# # Canonical (symmetric) PLS # Transform data # ~~~~~~~~~~~~~~ plsca = PLSCanonical(n_components=2) plsca.fit(X_train, Y_train) X_train_r, Y_train_r = plsca.transform(X_train, Y_train) X_test_r, Y_test_r = plsca.transform(X_test, Y_test) # Scatter plot of scores # ~~~~~~~~~~~~~~~~~~~~~~ # 1) On diagonal plot X vs Y scores on each components plt.figure(figsize=(12, 8)) plt.subplot(221) plt.plot(X_train_r[:, 0], Y_train_r[:, 0], "ob", label="train") plt.plot(X_test_r[:, 0], Y_test_r[:, 0], "or", label="test") plt.xlabel("x scores") plt.ylabel("y scores") plt.title('Comp. 1: X vs Y (test corr = %.2f)' % np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1])
################################################################################ # # PLS # ################################################################################ import scipy.linalg from sklearn.cross_decomposition import PLSCanonical Xim_tgt_s_ = StandardScaler().fit_transform(Xim[msk_tgt, :]) Xdemoclin_tgt_s_ = StandardScaler().fit_transform(Xdemoclin[msk_tgt, :]) _, s_, _ = scipy.linalg.svd(Xdemoclin_tgt_s_, full_matrices=False) rank_ = np.sum(s_ > 1e-6) plsca = PLSCanonical(n_components=rank_) %time PLSim_scores, PLSclin_scores = plsca.fit_transform(Xim_tgt_s_, Xdemoclin_tgt_s_) # Imaging components df_ = pd.DataFrame(PLSim_scores) df_["respond_wk8"] = pop["respond_wk8"][msk_tgt].values df_["respond_wk16"] = pop["respond_wk16"][msk_tgt].values df_["GM_frac"] = pop["GM_frac"][msk_tgt].values sns.pairplot(df_, hue="respond_wk8") print("PC1 capture global GM atrophy") # Demo/Clinic components df_ = pd.DataFrame(PLSclin_scores) for var in vars_demo + vars_clinic + ["respond_wk8", "respond_wk16"]: df_[var] = pop[var][msk_tgt].values
vec_c.append(i) for i in vec2: vec_c.append(i) if j < len_train: l_p.append(vec_p) l_c.append(vec_c) else: l_p_t.append(vec_p) l_c_t.append(vec_c) j += 1 sorted_p = np.asarray(l_p) sorted_c = np.asarray(l_c) #Convert the input to an array plc = PLSCanonical() plc.fit_transform(sorted_c, sorted_p) sorted_c, sorted_p = plc.transform(sorted_c, sorted_p) sorted_c_test = np.asarray(l_c_t) sorted_p_test = np.asarray(l_p_t) sorted_c_test, sorted_p_test = plc.transform(sorted_c_test, sorted_p_test) plr = PLSRegression() plr.fit(sorted_c, sorted_p) params = plr.get_params() plr.set_params(**params) y_score = plr.predict(sorted_c_test) sim_count = 0 print("Test Similarity: ")
class Wrapper: """ This is a wrapper class for linear, regularised and kernel CCA, Multiset CCA and Generalized CCA. We create an instance with a method and number of latent dimensions. If we have more than 2 views we need to use generalized methods, but we can override in the 2 view case also with the generalized parameter. The class has a number of methods: fit(): gives us train correlations and stores the variables needed for out of sample prediction as well as some method-specific variables cv_fit(): allows us to perform a hyperparameter search and then fit the model using the optimal hyperparameters predict_corr(): allows us to predict the out of sample correlation for supplied views predict_view(): allows us to predict a reconstruction of missing views from the supplied views transform_view(): allows us to transform given views to the latent variable space remaining methods are used to """ def __init__(self, latent_dims: int = 1, method: str = 'l2', generalized: bool = False, max_iter: int = 500, tol=1e-6): self.latent_dims = latent_dims self.method = method self.generalized = generalized self.max_iter = max_iter self.tol = tol def fit(self, *args, params=None): if params is None: params = {} self.params = params if len(args) > 2: self.generalized = True print('more than 2 views therefore switched to generalized') if 'c' not in self.params: self.params = {'c': [0] * len(args)} if self.method == 'kernel': #Linear kernel by default if 'kernel' not in self.params: self.params['kernel'] = 'linear' #First order polynomial by default if 'degree' not in self.params: self.params['degree'] = 1 # First order polynomial by default if 'sigma' not in self.params: self.params['sigma'] = 1.0 # Fit returns in-sample score vectors and correlations as well as models with transform functionality self.dataset_list = [] self.dataset_means = [] for dataset in args: self.dataset_means.append(dataset.mean(axis=0)) self.dataset_list.append(dataset - dataset.mean(axis=0)) if self.method == 'kernel': self.fit_kcca = cca_zoo.KCCA.KCCA(self.dataset_list[0], self.dataset_list[1], params=self.params, latent_dims=self.latent_dims) self.score_list = [self.fit_kcca.U, self.fit_kcca.V] elif self.method == 'pls': self.fit_scikit_pls(self.dataset_list[0], self.dataset_list[1]) elif self.method == 'scikit': self.fit_scikit_cca(self.dataset_list[0], self.dataset_list[1]) elif self.method == 'mcca': self.fit_mcca(*self.dataset_list) elif self.method == 'gcca': self.fit_gcca(*self.dataset_list) else: self.outer_loop(*self.dataset_list) if self.method[:4] == 'tree': self.tree_list = [self.tree_list[i] for i in range(len(args))] self.weights_list = [np.expand_dims(tree.feature_importances_, axis=1) for tree in self.tree_list] else: self.rotation_list = [] for i in range(len(args)): self.rotation_list.append( self.weights_list[i] @ pinv2(self.loading_list[i].T @ self.weights_list[i], check_finite=False)) self.train_correlations = self.predict_corr(*args) return self def cv_fit(self, *args, param_candidates=None, folds: int = 5, verbose: bool = False): best_params = cross_validate(*args, max_iter=self.max_iter, latent_dims=self.latent_dims, method=self.method, param_candidates=param_candidates, folds=folds, verbose=verbose, tol=self.tol) self.fit(*args, params=best_params) return self def bayes_cv_fit(self, *args, param_candidates=None, folds: int = 5, verbose: bool = False): space = { "n_estimators": hp.choice("n_estimators", [100, 200, 300, 400, 500, 600]), "max_depth": hp.quniform("max_depth", 1, 15, 1), "criterion": hp.choice("criterion", ["gini", "entropy"]), } trials = Trials() best_params = fmin( fn=Wrapper(), space=space, algo=tpe.suggest, max_evals=100, trials=trials ) self.fit(*args, params=best_params) return self def predict_corr(self, *args): # Takes two datasets and predicts their out of sample correlation using trained model transformed_views = self.transform_view(*args) all_corrs = [] for x, y in itertools.product(transformed_views, repeat=2): all_corrs.append(np.diag(np.corrcoef(x.T, y.T)[:self.latent_dims, self.latent_dims:])) all_corrs = np.array(all_corrs).reshape((len(args), len(args), self.latent_dims)) return all_corrs def predict_view(self, *args): # Regress original given views onto target transformed_views = self.transform_view(*args) # Get the regression from the training data with available views predicted_target = np.mean([transformed_views[i] for i in range(len(args)) if args[i] is not None], axis=0) predicted_views = [] for i, view in enumerate(args): if view is None: predicted_views.append(predicted_target @ pinv2(self.weights_list[i])) else: predicted_views.append(view) for i, predicted_view in enumerate(predicted_views): predicted_views[i] += self.dataset_means[i] return predicted_views def transform_view(self, *args): # Demeaning new_views = [] for i, new_view in enumerate(args): if new_view is None: new_views.append(None) else: new_views.append(new_view - self.dataset_means[i]) if self.method == 'kernel': transformed_views = list(self.fit_kcca.transform(new_views[0], new_views[1])) elif self.method == 'pls': transformed_views = list(self.PLS.transform(new_views[0], new_views[1])) elif self.method[:4] == 'tree': transformed_views = [] for i, new_view in enumerate(new_views): if new_view is None: transformed_views.append(None) else: transformed_views.append(self.tree_list[i].predict(new_view)) else: transformed_views = [] for i, new_view in enumerate(new_views): if new_view is None: transformed_views.append(None) else: transformed_views.append(new_view @ self.rotation_list[i]) # d x n x k return transformed_views def outer_loop(self, *args): # list of d: p x k self.weights_list = [np.zeros((args[i].shape[1], self.latent_dims)) for i in range(len(args))] # list of d: n x k self.score_list = [np.zeros((args[i].shape[0], self.latent_dims)) for i in range(len(args))] # list of d: self.loading_list = [np.zeros((args[i].shape[1], self.latent_dims)) for i in range(len(args))] if len(args) == 2: C_train = args[0].T @ args[1] C_train_res = C_train.copy() else: C_train_res = None residuals = list(args) # For each of the dimensions for k in range(self.latent_dims): self.inner_loop = cca_zoo.alternating_least_squares.ALS_inner_loop(*residuals, C=C_train_res, generalized=self.generalized, params=self.params, method=self.method, max_iter=self.max_iter) for i in range(len(args)): if self.method[:4] == 'tree': self.tree_list = self.inner_loop.weights else: self.weights_list[i][:, k] = self.inner_loop.weights[i] self.score_list[i][:, k] = self.inner_loop.targets[i, :] self.loading_list[i][:, k] = residuals[i].T @ self.score_list[i][:, k] / np.linalg.norm( self.score_list[i][:, k]) residuals[i] -= np.outer(self.score_list[i][:, k] / np.linalg.norm(self.score_list[i][:, k]), self.loading_list[i][:, k]) return self def fit_scikit_cca(self, train_set_1, train_set_2): self.cca = CCA(n_components=self.latent_dims, scale=False) self.cca.fit(train_set_1, train_set_2) self.score_list = [self.cca.x_scores_, self.cca.y_scores_] self.weights_list = [self.cca.x_weights_, self.cca.y_weights_] self.loading_list = [self.cca.x_loadings_, self.cca.y_loadings_] self.rotation_list = [self.cca.x_rotations_, self.cca.y_rotations_] return self def fit_scikit_pls(self, train_set_1, train_set_2): self.PLS = PLSCanonical(n_components=self.latent_dims, scale=False) self.PLS.fit(train_set_1, train_set_2) self.score_list = [self.PLS.x_scores_, self.PLS.y_scores_] self.weights_list = [self.PLS.x_weights_, self.PLS.y_weights_] return self def fit_mcca(self, *args): all_views = np.concatenate(args, axis=1) C = all_views.T @ all_views # Can regularise by adding to diagonal D = block_diag(*[(1 - self.params['c'][i]) * m.T @ m + self.params['c'][i] * np.eye(m.shape[1]) for i, m in enumerate(args)]) R = cholesky(D, lower=False) whitened = np.linalg.inv(R.T) @ C @ np.linalg.inv(R) [eigvals, eigvecs] = np.linalg.eig(whitened) idx = np.argsort(eigvals, axis=0)[::-1] eigvecs = eigvecs[:, idx].real eigvals = eigvals[idx].real eigvecs = np.linalg.inv(R) @ eigvecs splits = np.cumsum([0] + [view.shape[1] for view in args]) self.weights_list = [eigvecs[splits[i]:splits[i + 1], :self.latent_dims] for i in range(len(args))] self.rotation_list = self.weights_list self.score_list = [self.dataset_list[i] @ self.weights_list[i] for i in range(len(args))] def fit_gcca(self, *args): Q = [] for i, view in enumerate(args): view_cov = view.T @ view view_cov = (1 - self.params['c'][i]) * view_cov + self.params['c'][i] * np.eye(view_cov.shape[0]) Q.append(view @ np.linalg.inv(view_cov) @ view.T) Q = np.sum(Q, axis=0) [eigvals, eigvecs] = np.linalg.eig(Q) idx = np.argsort(eigvals, axis=0)[::-1] eigvecs = eigvecs[:, idx].real eigvals = eigvals[idx].real self.weights_list = [np.linalg.pinv(view) @ eigvecs[:, :self.latent_dims] for view in args] self.rotation_list = self.weights_list self.score_list = [self.dataset_list[i] @ self.weights_list[i] for i in range(len(args))]
def run_pls_loop(n_seeds: int = 10, equal_dims: bool = False, **kwargs): default_args = { 'sample_sizes': [int(10 ** i) for i in range(2, 5)], 'seeds': [int(2 ** i) for i in range(n_seeds)], 'sigmas': np.linspace(0, 5, num=11), 'orthogonal': [False], 'normal': [True], 'three_d': [False, True], 'dims_x': np.logspace(1, 4, num=4, base=4, dtype=int), 'dims_y': np.logspace(1, 4, num=4, base=4, dtype=int), } for k in default_args: if k in kwargs: default_args[k] = list(kwargs[k]) if isinstance(kwargs[k], (list, tuple, np.ndarray)) else [kwargs[k]] df = pd.DataFrame() for n_samples in tqdm(default_args['sample_sizes']): for three_d in default_args['three_d']: train, test = generate_source_signal(n_samples=n_samples, three_d=three_d) for seed in tqdm(default_args['seeds'], leave=False): for orthogonal in default_args['orthogonal']: for normal in default_args['normal']: for sigma in default_args['sigmas']: for dim_x in default_args['dims_x']: for dim_y in [dim_x] if equal_dims else default_args['dims_y']: # create sim sim = create_pls_simulation( train=train, test=test, n_samples=n_samples, three_d=three_d, angle_spacing=1.0, magnitude_range=None, dim_x=dim_x, dim_y=dim_y, sigma=sigma, orthogonal=orthogonal, normal=normal, seed=seed, ) # fit PLS pls = PLSCanonical( n_components=sim['metadata']['dim_z'], scale=True, algorithm='svd', max_iter=int(1e9), tol=1e-15, ).fit(sim['x_train'], sim['y_train']) # get results results = visualize_pls_results(pls, sim, verbose=False) results.update({ 'n_samples': n_samples, 'three_d': three_d, 'seed': seed, 'orthogonal': orthogonal, 'normal': normal, 'sigma': sigma, 'dim_x': dim_x, 'dim_y': dim_y, }) results = {k: [v] for k, v in results.items()} df = pd.concat([df, pd.DataFrame.from_dict(results)]) return reset_df(df), default_args
def pls_decomposition(videos, audios, n_components=256): plsca = PLSCanonical(n_components=n_components) plsca.fit(audios, videos) videos_c, audios_c = plsca.transform(videos, audios) return videos_c, audios_c
reg = linear_model.Lasso(alpha=0.1) #弹性网络回归(Elastic Net) from sklearn.linear_model import ElasticNet regr = ElasticNet(random_state=0) #贝叶斯回归(Bayesian Regression) from sklearn import linear_model reg = linear_model.BayesianRidge() #多项式回归(Polynomial regression——多项式基函数回归) from sklearn.preprocessing import PolynomialFeatures poly = PolynomialFeatures(degree=2) poly.fit_transform(X) #偏最小二乘回归(PLS) from sklearn.cross_decomposition import PLSCanonical PLSCanonical(algorithm='nipals', copy=True, max_iter=500, n_components=2, scale=True, tol=1e-06) #典型相关分析(CCA) from sklearn.cross_decomposition import CCA cca = CCA(n_components=2) #B聚类分析 #KNN算法 from sklearn.neighbors import KNeighborsClassifier nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(X) #Kmeans算法 from sklearn.cluster import KMeans kmeans = KMeans(init='k-means++', n_clusters=n_digits, n_init=10) #层次聚类(Hierarchical clustering)——支持多种距离 from sklearn.cluster import AgglomerativeClustering
plt.plot(fpr, tpr) plt.plot([0, 1], [0, 1]) plt.xlim([0, 1]) plt.gca().set_aspect('equal', adjustable='box') plt.legend(['Cell volume', 'Age', 'Both']) #NB: Strong colinearity between Age and Volume # Transition rate prediction using PLS X = dfc_g1[['vol_sm', 'Age', 'gr_sm']] # Design matrix y = dfc_g1['G1S_logistic'] # Response var # Drop NaN rows I = np.isnan(dfc_g1['gr_sm']) X = X.loc[~I].copy() y = y[~I] pls_model = PLSCanonical() pls_model.fit(scale(X), y) X_c, y_c = pls_model.transform(scale(X), y) # Multiple linearregression on birth size and growth rate df['bvol'] = df['Birth volume'] df['exp_gr'] = df['Exponential growth rate'] df['g1_len'] = df['G1 length'] model = smf.ols('g1_len ~ exp_gr + bvol', data=df).fit() model.summary() print model.pvalues # Delete S/G2 after first time point g1s_marked = [] for c in collated_filtered:
def training_lda_TD4_intra(my_clfs, trains, classes, **kw): start_time = time.time() if (kw.has_key('log_fold')): log_fold = root_path + '/result/' + kw['log_fold'] new_fold(log_fold) chan_len = kw['chan_len'] action_num = kw['action_num'] cv = 3 results = [] results.append([ 'Feat', 'Algorithm', 'n_components', 'Channel_Pos', 'Accuracy', 'std' ]) log_file = 'feat_' + kw['feature_type'] + '_intra' clf = sklearn.lda.LDA(solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=0.0001) data_num = trains.shape[0] / action_num scores = sklearn.cross_validation.cross_val_score(clf, trains, classes, cv=cv) results.append([ 'feat_TD4_cv_' + str(cv), 'lda', 'ALL', 0, scores.mean(), scores.std() ]) # 组内训练策略 9组数据 print '组内训练.............' for idx, channel_pos in enumerate(kw['pos_list']): # print '----training TD4 intra , channel_pos: ', channel_pos,'......' trains_intra = trains[:, idx * chan_len:idx * chan_len + chan_len] scores = sklearn.cross_validation.cross_val_score(clf, trains_intra, classes, cv=cv) results.append([ 'feat_TD4_cv_' + str(cv), 'lda', 0, channel_pos, scores.mean(), scores.std() ]) # 中心训练策略 print '中心训练策略.............' trains_intra_S0 = trains[:, 0:chan_len] for idx, channel_pos in enumerate(kw['pos_list']): if channel_pos == 'S0': continue tests_shift = trains[:, idx * chan_len:idx * chan_len + chan_len] # if channel_pos == 'L2': # print idx*chan_len, idx*chan_len+chan_len, tests_shift.shape, trains.shape # sys.exit(0) scores = clf.fit(trains_intra_S0, classes).score(tests_shift, classes) results.append([ 'feat_TD4_cv_' + str(cv), 'lda', 0, 'train S0' + ' test ' + channel_pos, scores.mean(), scores.std() ]) # 组训练策略(不同于组内训练策略) 5-fold print '组训练策略.............' trains_intra_S0 = trains[:, 0:chan_len] kf = KFold(data_num, n_folds=cv) for idx, channel_pos in enumerate(kw['pos_list']): if channel_pos == 'S0': continue itera = cv scores = np.zeros((itera, )) # stds = np.zeros( (itera,) ) itera -= 1 trains_shift = trains[:, idx * chan_len:idx * chan_len + chan_len] for train_idx, test_idx in kf: train_idx_all = np.array([], np.int) test_idx_all = np.array([], np.int) for action_idx in range(action_num): train_idx_all = np.concatenate( (train_idx_all, train_idx * (action_idx + 1)), axis=0) test_idx_all = np.concatenate( (test_idx_all, test_idx * (action_idx + 1)), axis=0) X_train = np.concatenate( (trains_intra_S0[train_idx_all], trains_shift[train_idx_all]), axis=0) y_train = np.concatenate( (classes[train_idx_all], classes[train_idx_all]), axis=0) X_test = trains_shift[test_idx_all] y_test = classes[test_idx_all] # X_test = trains_shift # y_test = classes score = clf.fit(X_train, y_train).score(X_test, y_test) scores[itera] = score.mean() itera -= 1 # print scores results.append([ 'feat_TD4_cv_' + str(cv), 'lda', 0, 'S0 + ' + channel_pos, np.mean(scores), np.std(scores) ]) # 基于CCA的训练策略 5-fold 交叉验证 print 'CCA训练策略.............' trains_S0 = trains[:, 0:chan_len] n_components_list = [6, 8, 10, 12, 14, 16] # 子空间维数 # n_components_list = [12,14,16] kf = KFold(data_num, n_folds=cv) for n_components in n_components_list: for idx, channel_pos in enumerate(kw['pos_list']): if channel_pos == 'S0': continue itera = cv scores = np.zeros((itera, )) stds = np.zeros((itera, )) itera -= 1 trains_shift = trains[:, idx * chan_len:idx * chan_len + chan_len] for train_idx, test_idx in kf: train_idx_all = np.array([], np.int) test_idx_all = np.array([], np.int) for action_idx in range(action_num): train_idx_all = np.concatenate( (train_idx_all, train_idx * (action_idx + 1)), axis=0) test_idx_all = np.concatenate( (test_idx_all, test_idx * (action_idx + 1)), axis=0) # print train_idx_all.shape, train_idx_all, test_idx_all.shape, test_idx_all # plsca.fit(trains_shift[train_idx_all], trains_S0[train_idx_all]) plsca = PLSCanonical(n_components=n_components) plsca.fit(trains_shift, trains_S0) trains_shift_cca, trains_S0_cca = plsca.transform( trains_shift, trains_S0) X_trains = np.concatenate( (trains_S0_cca, trains_shift_cca[train_idx_all]), axis=0) y_trains = np.concatenate((classes, classes[train_idx_all]), axis=0) score = clf.fit(X_trains, y_trains).score(trains_shift_cca[test_idx_all], classes[test_idx_all]) scores[itera] = score.mean() # stds[itera] = score.std() itera -= 1 results.append([ 'feat_TD4_cv_' + str(cv), 'lda_cca', n_components, 'S0 + ' + channel_pos, np.mean(scores), np.std(scores) ]) log_result(results, log_fold + '/' + log_file + '_action_1-' + str(action_num), 2) print '----Log Fold:', log_fold, ', log_file: ', log_file + '_action_1-' + str( action_num) print '----training TD4 time elapsed:', time.time() - start_time
def training_lda_TD4_inter(my_clfs, trains_S0, trains_shift, classes, **kw): print 'training_lda_TD4_inter.........' start_time = time.time() log_fold = root_path + '/result/' + kw['log_fold'] new_fold(log_fold) chan_len = kw['chan_len'] action_num = kw['action_num'] print "----training " + kw[ 'feature_type'] + " inter, training by position O, testing by electrode shift " cv = 5 results = [] results.append(['Feat', 'Algorithm', 'Channel_Pos', 'Accuracy', 'std']) log_file = 'feat_' + kw['feature_type'] + '_inter' clf = sklearn.lda.LDA(solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=0.0001) data_num = trains_S0.shape[0] / action_num # print data_num scores = sklearn.cross_validation.cross_val_score(clf, trains_S0, classes, cv=cv) results.append( ['feat_TD4_cv_' + str(cv), 'lda', 'S0', scores.mean(), scores.std()]) kf = KFold(data_num, n_folds=cv) for idx, channel_pos in enumerate(kw['pos_list']): X_test = trains_shift[:, idx * chan_len:idx * chan_len + chan_len] y_test = classes iteration = cv scores = np.zeros((iteration, )) cca_scores = np.zeros((iteration, )) iteration -= 1 for train_idx, test_idx in kf: train_idx_all = np.array([], np.int) test_idx_all = np.array([], np.int) for action_idx in range(action_num): train_idx_all = np.concatenate( (train_idx_all, train_idx * action_idx), axis=0) test_idx_all = np.concatenate( (test_idx_all, test_idx * action_idx), axis=0) # X_train, y_train = trains_S0[train_idx_all], classes[train_idx_all] X_train, y_train = trains_S0, classes X_train_shift, y_train_shift = X_test[train_idx_all], classes[ train_idx_all] X_train_all = np.concatenate((X_train, X_train_shift), axis=0) y_train_all = np.concatenate((y_train, y_train_shift), axis=0) sys.exit(0) score_inter = clf.fit(X_train_all, y_train_all).score(X_test, y_test) scores[iteration] = score_inter.mean() # print X_train.shape, y_train.shape if channel_pos != 'S0': # plsca = joblib.load(transform_fold+'/cca_transform_'+kw['subject']+'_'+channel_pos+'.model') plsca = PLSCanonical(n_components=14) # print X_test.shape, X_train.shape # sys.exit(0) plsca.fit(X_test[train_idx], X_train) X_test_cca, X_train_cca = plsca.transform(X_test, X_train) cca_score = clf.fit(X_train_cca, y_train).score(X_test_cca, y_test) cca_scores[iteration] = cca_score.mean() iteration -= 1 # print scores # print cca_scores # sys.exit(0) results.append( ['feat_TD4', 'lda', channel_pos, np.mean(scores), np.std(scores)]) results.append([ 'feat_TD4', 'lda_cca', channel_pos, np.mean(cca_scores), np.std(cca_scores) ]) log_result(results, log_fold + '/' + log_file + '_' + str(kw['num']), 2) print '----Log Fold:', log_fold, ', log_file: ', log_file + '_' + channel_pos + '_' + str( kw['num']) print '----training TD4 time elapsed:', time.time() - start_time
connectivity_data) if not include_negative_weights: # set negative connectivities to 0 edge_data = np.apply_along_axis( lambda x: [0 if element < 0 else element for element in x], 1, edge_data) # re-split data (3 ways) for CCA X1_train = edge_data[:140, :] X2_train = edge_data[140:280, :] X2_remain = edge_data[280:, :] #cca = CCA(n_components =2) #cca.fit(X1_train, X2_train) cca = PLSCanonical(n_components=100) cca.fit(X1_train, X2_train) block_1_transformed, block_2_transformed = cca.transform(X1_train, X2_train, copy=False) block_3_transformed = np.dot(X2_remain, cca.y_rotations_) edge_data_transformed = np.vstack( (block_1_transformed, block_2_transformed, block_3_transformed)) # initialise the classifier clf = svm.SVC(kernel='precomputed') # optional shuffle perm = np.random.permutation(n_subjects) #print perm
#correct not accurate from sklearn.cross_validation import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn import metrics from sklearn.svm import SVC import numpy as np import pandas as pd from sklearn.cross_decomposition import PLSRegression from sklearn.cross_decomposition import PLSCanonical df = pd.read_csv('newdata.csv') x = df.drop(['tag'], axis=1) y = df.drop(['kx', 'ky', 'kz', 'wa', 'wb', 'wc', 'wd', 'we', 'wf'], axis=1) X_train, X_test, Y_train, Y_test = train_test_split(x, y, random_state=5) plsr = PLSRegression() plsr.fit(X_train, Y_train) plsc = PLSCanonical() plsc.fit(X_train, Y_train) print(plsr.score(X_test, Y_test)) print(plsc.score(X_test, Y_test))
plssvd = PLSSVD(n_components=2) xt,yt = plssvd.fit_transform(dataTrain,Ytrain) fig = plt.figure() util.plotData(fig,xt,labelsTrain,classColors) u = plssvd.x_weights_ plt.quiver(u[0,0],u[1,0],color='k',edgecolor='k',lw=1,scale=0.1,figure=fig) plt.quiver(-u[1,0],u[0,0],color='k',edgecolor='k',lw=1,scale=0.4,figure=fig) #%% PLS mode-A lda = LDA() nComponents = np.arange(1,nClasses+1) plsCanScores = np.zeros((2,np.alen(nComponents))) for i,n in enumerate(nComponents): plscan = PLSCanonical(n_components=n) plscan.fit(dataTrain,Ytrain) dataTrainT = plscan.transform(dataTrain) dataTestT = plscan.transform(dataTest) plsCanScores[:,i] = util.classify(dataTrainT,dataTestT,labelsTrain,labelsTest) fig = plt.figure() util.plotAccuracy(fig,nComponents,plsCanScores) plt.title('PLS Canonical accuracy',figure=fig) plscan = PLSCanonical(n_components=2) xt,yt = plscan.fit_transform(dataTrain,Ytrain) fig = plt.figure() util.plotData(fig,xt,labelsTrain,classColors) u = plscan.x_weights_ plt.quiver(u[0,0],u[1,0],color='k',edgecolor='k',lw=1,scale=0.1,figure=fig)
def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams)
plt.plot(nComponents, plsSvdScores[i, :], lw=3) plt.xlim(1, np.amax(nComponents)) plt.title('PLS SVD accuracy') plt.xlabel('Number of components') plt.ylabel('accuracy') plt.legend(['LR', 'LDA', 'GNB', 'Linear SVM', 'rbf SVM'], loc='lower right') plt.grid(True) if (0): #%% PLS Cannonical nComponents = np.arange(1, nClasses + 1) plsCanScores = np.zeros((5, np.alen(nComponents))) for i, n in enumerate(nComponents): plscan = PLSCanonical(n_components=n) plscan.fit(Xtrain, Ytrain) XtrainT = plscan.transform(Xtrain) XtestT = plscan.transform(Xtest) plsCanScores[:, i] = util.classify(XtrainT, XtestT, labelsTrain, labelsTest) plscan = PLSCanonical(n_components=2) plscan.fit(Xtrain, Ytrain) xt = plscan.transform(Xtrain) fig = plt.figure() util.plotData(fig, xt, labelsTrain, classColors) plt.title('First 2 components of projected data') #%% Plot accuracies for PLSSVD plt.figure()
from sklearn import datasets import numpy as np from sklearn.model_selection import train_test_split from sklearn.cross_decomposition import PLSCanonical from sklearn.neighbors import KNeighborsClassifier import math from mlxtend.feature_selection import SequentialFeatureSelector as SFS dataSet = datasets.load_digits() data = dataSet["data"] target = dataSet["target"] plsca = PLSCanonical(n_components=2) plsca.fit(data, target) X_train_r, Y_train_r = plsca.transform(data, target) knn = math.sqrt(len(X_train_r)) knn = KNeighborsClassifier(n_neighbors=int(knn)) Y_train_r = [int(Y_train_r[i]) for i in range(0, len(Y_train_r))] k = knn.fit(X_train_r, Y_train_r) print(k.score(X_train_r, Y_train_r)) knn = KNeighborsClassifier(n_neighbors=4) sfs = SFS(knn, k_features=3, forward=True, floating=False, verbose=2,
def _create_model(self): return PLSCanonical()
for i in range (5): plt.plot(nComponents,plsSvdScores[i,:],lw=3) plt.xlim(1,np.amax(nComponents)) plt.title('PLS SVD accuracy') plt.xlabel('Number of components') plt.ylabel('accuracy') plt.legend (['LR','LDA','GNB','Linear SVM','rbf SVM'],loc='lower right') plt.grid(True) if (0): #%% PLS Cannonical nComponents = np.arange(1,nClasses+1) plsCanScores = np.zeros((5,np.alen(nComponents))) for i,n in enumerate(nComponents): plscan = PLSCanonical(n_components=n) plscan.fit(Xtrain,Ytrain) XtrainT = plscan.transform(Xtrain) XtestT = plscan.transform(Xtest) plsCanScores[:,i] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) plscan = PLSCanonical(n_components=2) plscan.fit(Xtrain,Ytrain) xt = plscan.transform(Xtrain) fig = plt.figure() util.plotData(fig,xt,labelsTrain,classColors) plt.title('First 2 components of projected data') #%% Plot accuracies for PLSSVD
def fit_scikit_pls(self, train_set_1, train_set_2): self.PLS = PLSCanonical(n_components=self.latent_dims, scale=False) self.PLS.fit(train_set_1, train_set_2) self.score_list = [self.PLS.x_scores_, self.PLS.y_scores_] self.weights_list = [self.PLS.x_weights_, self.PLS.y_weights_] return self
def __init__(self, allow_missing_values=False): # Explicitly initialise both constructors: super(MultiCurvePlsPredictor, self).__init__(classic_estimator=PLSCanonical(), allow_missing_values=allow_missing_values)
#Następnie sprawdź sprawność klasyfikatora kNN dla zbioru testowego ograniczonego do wybranego #podzbioru cech. Parametr kk przyjmij jako pierwiastek z liczby obiektów w zbiorze. #Dla jakiej liczby cech osiągnięto najlepsze rezultaty? from sklearn import datasets from sklearn import model_selection from sklearn.neighbors import KNeighborsClassifier import math from sklearn.cross_decomposition import PLSCanonical mnist_dataset = datasets.load_digits() X = mnist_dataset.data Y = mnist_dataset.target target_names = mnist_dataset.target_names train, test, train_targets, test_targets = model_selection.train_test_split(X, Y, train_size=0.5,test_size=0.5) max = 0 max_n_components = 0 for i in range(1, 10): plsca = PLSCanonical(n_components=i) plsca.fit(train, train_targets) X_r = plsca.fit(train, train_targets).transform(train) Y_r = plsca.fit(test, test_targets).transform(test) clf = KNeighborsClassifier(round(math.sqrt(X.shape[0])),weights="uniform", metric="euclidean") clf.fit(X_r, train_targets) print(i, ":", clf.score(Y_r, test_targets)) if max < clf.score(Y_r, test_targets): max = clf.score(Y_r, test_targets) max_n_components = i print("Best result for:", max_n_components)
def get_algorithm(self): ''' Inputs: algorithm (string) - Name of the regressor to run. Follows Sklearn naming conventions. Available keys: ARDRegression | AdaBoostRegressor | BaggingRegressor | BayesianRidge | CCA DecisionTreeRegressor | ElasticNet | ExtraTreeRegressor ExtraTreesRegressor | GaussianProcessRegressor | GradientBoostingRegressor HuberRegressor | KNeighborsRegressor | KernelRidge | Lars | Lasso LassoLars | LinearRegression | LinearSVR | MLPRegressor | NuSVR | OrthogonalMatchingPursuit | PLSCanonical | PLSRegression | PassiveAggressiveRegressor | RANSACRegressor | RandomForestRegressor | Ridge | SGDRegressor | SVR | TheilSenRegressor | TransformedTargetRegressor Currently not supporting: ElasticNetCV | LarsCV | LassoCV | LassoLarsCV | LassoLarsIC | MultiTaskElasticNet | MultiTaskElasticNetCV | MultiTaskLasso | MultiTaskLassoCV | OrthogonalMatchingPursuitCV | RidgeCV | RadiusNeighborsRegressor Outputs: Notes: Scoring Metrics: https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter ''' if (self.algorithmName == "ARDRegression"): algorithm = ARDRegression() elif (self.algorithmName == "AdaBoostRegressor"): algorithm = AdaBoostRegressor() elif (self.algorithmName == "BaggingRegressor"): algorithm = BaggingRegressor() elif (self.algorithmName == "BayesianRidge"): algorithm = BayesianRidge() elif (self.algorithmName == "CCA"): algorithm = CCA() elif (self.algorithmName == "DecisionTreeRegressor"): algorithm = DecisionTreeRegressor() elif (self.algorithmName == "ElasticNet"): algorithm = ElasticNet() elif (self.algorithmName == "ExtraTreeRegressor"): algorithm = ExtraTreeRegressor() elif (self.algorithmName == "ExtraTreesRegressor"): algorithm = ExtraTreesRegressor() elif (self.algorithmName == "GaussianProcessRegressor"): algorithm = GaussianProcessRegressor() elif (self.algorithmName == "GradientBoostingRegressor"): algorithm = GradientBoostingRegressor() elif (self.algorithmName == "HuberRegressor"): algorithm = HuberRegressor() elif (self.algorithmName == "KNeighborsRegressor"): algorithm = KNeighborsRegressor() elif (self.algorithmName == "KernelRidge"): algorithm = KernelRidge() elif (self.algorithmName == "Lars"): algorithm = Lars() elif (self.algorithmName == "Lasso"): algorithm = Lasso() elif (self.algorithmName == "LassoLars"): algorithm = LassoLars() elif (self.algorithmName == "LinearRegression"): algorithm = LinearRegression() elif (self.algorithmName == "LinearSVR"): algorithm = LinearSVR() elif (self.algorithmName == "MLPRegressor"): algorithm = MLPRegressor() elif (self.algorithmName == "NuSVR"): algorithm = NuSVR() elif (self.algorithmName == "OrthogonalMatchingPursuit"): algorithm = OrthogonalMatchingPursuit() elif (self.algorithmName == "PLSCanonical"): algorithm = PLSCanonical() elif (self.algorithmName == "PLSRegression"): algorithm = PLSRegression() elif (self.algorithmName == "PassiveAggressiveRegressor"): algorithm = PassiveAggressiveRegressor() elif (self.algorithmName == "RANSACRegressor"): algorithm = RANSACRegressor() elif (self.algorithmName == "RandomForestRegressor"): algorithm = RandomForestRegressor() elif (self.algorithmName == "Ridge"): algorithm = Ridge() elif (self.algorithmName == "SGDRegressor"): algorithm = SGDRegressor() elif (self.algorithmName == "SVR"): algorithm = SVR() elif (self.algorithmName == "TheilSenRegressor"): algorithm = TheilSenRegressor() elif (self.algorithmName == "TransformedTargetRegressor"): algorithm = TransformedTargetRegressor() else: return None return algorithm
ursis = [] graph_features = [] # label_col = df1.loc['URSI'] # print('label col:') # print(label_col) ursi_ids = df1.iloc[:, 0] linreg = LinearRegression(normalize=True) lasso = Lasso(fit_intercept=True, normalize=True) ransac = RANSACRegressor() pls = PLSRegression() cca = CCA() pls_ca = PLSCanonical() rf = RandomForestRegressor(n_estimators=50, n_jobs=4) gp = GaussianProcessRegressor() ir = IsotonicRegression() svr_lin = SVR(kernel='linear') svr_rbf = SVR() classifiers = [linreg, lasso, pls, svr_lin, svr_rbf, rf, gp] classifier_names = ['LR', 'Lasso', 'PLS', 'SVR (lin)', 'SVR (rbf)', 'RF', 'GP'] prediction_targets = ['all', 'CCI'] r = {} mse = {}
#correct not accurate from sklearn.cross_validation import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn import metrics from sklearn.svm import SVC import numpy as np import pandas as pd from sklearn.cross_decomposition import PLSRegression from sklearn.cross_decomposition import PLSCanonical df=pd.read_csv('newdata.csv') x=df.drop(['tag'],axis=1) y=df.drop(['kx','ky','kz','wa','wb','wc','wd','we','wf'],axis=1) X_train , X_test , Y_train , Y_test = train_test_split(x,y , random_state=5) plsr=PLSRegression() plsr.fit(X_train,Y_train) plsc=PLSCanonical() plsc.fit(X_train,Y_train) print (plsr.score(X_test,Y_test)) print (plsc.score(X_test,Y_test))
def GetAllModelsForComparison(X_train, Y_train): models = { 'ARDRegression': ARDRegression(), 'BayesianRidge': BayesianRidge(), 'ElasticNet': ElasticNet(), 'ElasticNetCV': ElasticNetCV(), 'Hinge': Hinge(), #'Huber': Huber(), 'HuberRegressor': HuberRegressor(), 'Lars': Lars(), 'LarsCV': LarsCV(), 'Lasso': Lasso(), 'LassoCV': LassoCV(), 'LassoLars': LassoLars(), 'LassoLarsCV': LassoLarsCV(), 'LinearRegression': LinearRegression(), 'Log': Log(), 'LogisticRegression': LogisticRegression(), 'LogisticRegressionCV': LogisticRegressionCV(), 'ModifiedHuber': ModifiedHuber(), 'MultiTaskElasticNet': MultiTaskElasticNet(), 'MultiTaskElasticNetCV': MultiTaskElasticNetCV(), 'MultiTaskLasso': MultiTaskLasso(), 'MultiTaskLassoCV': MultiTaskLassoCV(), 'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(), 'OrthogonalMatchingPursuitCV': OrthogonalMatchingPursuitCV(), 'PassiveAggressiveClassifier': PassiveAggressiveClassifier(), 'PassiveAggressiveRegressor': PassiveAggressiveRegressor(), 'Perceptron': Perceptron(), 'RANSACRegressor': RANSACRegressor(), #'RandomizedLasso': RandomizedLasso(), #'RandomizedLogisticRegression': RandomizedLogisticRegression(), 'Ridge': Ridge(), 'RidgeCV': RidgeCV(), 'RidgeClassifier': RidgeClassifier(), 'SGDClassifier': SGDClassifier(), 'SGDRegressor': SGDRegressor(), 'SquaredLoss': SquaredLoss(), 'TheilSenRegressor': TheilSenRegressor(), 'BaseEstimator': BaseEstimator(), 'ClassifierMixin': ClassifierMixin(), 'LinearClassifierMixin': LinearClassifierMixin(), 'LinearDiscriminantAnalysis': LinearDiscriminantAnalysis(), 'QuadraticDiscriminantAnalysis': QuadraticDiscriminantAnalysis(), 'StandardScaler': StandardScaler(), 'TransformerMixin': TransformerMixin(), 'BaseEstimator': BaseEstimator(), 'KernelRidge': KernelRidge(), 'RegressorMixin': RegressorMixin(), 'LinearSVC': LinearSVC(), 'LinearSVR': LinearSVR(), 'NuSVC': NuSVC(), 'NuSVR': NuSVR(), 'OneClassSVM': OneClassSVM(), 'SVC': SVC(), 'SVR': SVR(), 'SGDClassifier': SGDClassifier(), 'SGDRegressor': SGDRegressor(), #'BallTree': BallTree(), #'DistanceMetric': DistanceMetric(), #'KDTree': KDTree(), 'KNeighborsClassifier': KNeighborsClassifier(), 'KNeighborsRegressor': KNeighborsRegressor(), 'KernelDensity': KernelDensity(), #'LSHForest': LSHForest(), 'LocalOutlierFactor': LocalOutlierFactor(), 'NearestCentroid': NearestCentroid(), 'NearestNeighbors': NearestNeighbors(), 'RadiusNeighborsClassifier': RadiusNeighborsClassifier(), 'RadiusNeighborsRegressor': RadiusNeighborsRegressor(), #'GaussianProcess': GaussianProcess(), 'GaussianProcessRegressor': GaussianProcessRegressor(), 'GaussianProcessClassifier': GaussianProcessClassifier(), 'CCA': CCA(), 'PLSCanonical': PLSCanonical(), 'PLSRegression': PLSRegression(), 'PLSSVD': PLSSVD(), #'ABCMeta': ABCMeta(), #'BaseDiscreteNB': BaseDiscreteNB(), 'BaseEstimator': BaseEstimator(), #'BaseNB': BaseNB(), 'BernoulliNB': BernoulliNB(), 'ClassifierMixin': ClassifierMixin(), 'GaussianNB': GaussianNB(), 'LabelBinarizer': LabelBinarizer(), 'MultinomialNB': MultinomialNB(), 'DecisionTreeClassifier': DecisionTreeClassifier(), 'DecisionTreeRegressor': DecisionTreeRegressor(), 'ExtraTreeClassifier': ExtraTreeClassifier(), 'AdaBoostClassifier': AdaBoostClassifier(), 'AdaBoostRegressor': AdaBoostRegressor(), 'BaggingClassifier': BaggingClassifier(), 'BaggingRegressor': BaggingRegressor(), #'BaseEnsemble': BaseEnsemble(), 'ExtraTreesClassifier': ExtraTreesClassifier(), 'ExtraTreesRegressor': ExtraTreesRegressor(), 'GradientBoostingClassifier': GradientBoostingClassifier(), 'GradientBoostingRegressor': GradientBoostingRegressor(), 'IsolationForest': IsolationForest(), 'RandomForestClassifier': RandomForestClassifier(), 'RandomForestRegressor': RandomForestRegressor(), 'RandomTreesEmbedding': RandomTreesEmbedding(), #'VotingClassifier': VotingClassifier(), 'BaseEstimator': BaseEstimator(), 'ClassifierMixin': ClassifierMixin(), 'LabelBinarizer': LabelBinarizer(), 'MetaEstimatorMixin': MetaEstimatorMixin(), #'OneVsOneClassifier': OneVsOneClassifier(), #'OneVsRestClassifier': OneVsRestClassifier(), #'OutputCodeClassifier': OutputCodeClassifier(), 'Parallel': Parallel(), #'ABCMeta': ABCMeta(), 'BaseEstimator': BaseEstimator(), #'ClassifierChain': ClassifierChain(), 'ClassifierMixin': ClassifierMixin(), 'MetaEstimatorMixin': MetaEstimatorMixin(), #'MultiOutputClassifier': MultiOutputClassifier(), #'MultiOutputEstimator': MultiOutputEstimator(), #'MultiOutputRegressor': MultiOutputRegressor(), 'Parallel': Parallel(), 'RegressorMixin': RegressorMixin(), 'LabelPropagation': LabelPropagation(), 'LabelSpreading': LabelSpreading(), 'BaseEstimator': BaseEstimator(), 'IsotonicRegression': IsotonicRegression(), 'RegressorMixin': RegressorMixin(), 'TransformerMixin': TransformerMixin(), 'BernoulliRBM': BernoulliRBM(), 'MLPClassifier': MLPClassifier(), 'MLPRegressor': MLPRegressor() } return models
def test_sanity_check_pls_canonical_random(): # Sanity check for PLSCanonical on random data # The results were checked against the R-package plspm n = 500 p_noise = 10 q_noise = 5 # 2 latents vars: rng = check_random_state(11) l1 = rng.normal(size=n) l2 = rng.normal(size=n) latents = np.array([l1, l1, l2, l2]).T X = latents + rng.normal(size=4 * n).reshape((n, 4)) Y = latents + rng.normal(size=4 * n).reshape((n, 4)) X = np.concatenate((X, rng.normal(size=p_noise * n).reshape(n, p_noise)), axis=1) Y = np.concatenate((Y, rng.normal(size=q_noise * n).reshape(n, q_noise)), axis=1) pls = PLSCanonical(n_components=3) pls.fit(X, Y) expected_x_weights = np.array([ [0.65803719, 0.19197924, 0.21769083], [0.7009113, 0.13303969, -0.15376699], [0.13528197, -0.68636408, 0.13856546], [0.16854574, -0.66788088, -0.12485304], [-0.03232333, -0.04189855, 0.40690153], [0.1148816, -0.09643158, 0.1613305], [0.04792138, -0.02384992, 0.17175319], [-0.06781, -0.01666137, -0.18556747], [-0.00266945, -0.00160224, 0.11893098], [-0.00849528, -0.07706095, 0.1570547], [-0.00949471, -0.02964127, 0.34657036], [-0.03572177, 0.0945091, 0.3414855], [0.05584937, -0.02028961, -0.57682568], [0.05744254, -0.01482333, -0.17431274], ]) expected_x_loadings = np.array([ [0.65649254, 0.1847647, 0.15270699], [0.67554234, 0.15237508, -0.09182247], [0.19219925, -0.67750975, 0.08673128], [0.2133631, -0.67034809, -0.08835483], [-0.03178912, -0.06668336, 0.43395268], [0.15684588, -0.13350241, 0.20578984], [0.03337736, -0.03807306, 0.09871553], [-0.06199844, 0.01559854, -0.1881785], [0.00406146, -0.00587025, 0.16413253], [-0.00374239, -0.05848466, 0.19140336], [0.00139214, -0.01033161, 0.32239136], [-0.05292828, 0.0953533, 0.31916881], [0.04031924, -0.01961045, -0.65174036], [0.06172484, -0.06597366, -0.1244497], ]) expected_y_weights = np.array([ [0.66101097, 0.18672553, 0.22826092], [0.69347861, 0.18463471, -0.23995597], [0.14462724, -0.66504085, 0.17082434], [0.22247955, -0.6932605, -0.09832993], [0.07035859, 0.00714283, 0.67810124], [0.07765351, -0.0105204, -0.44108074], [-0.00917056, 0.04322147, 0.10062478], [-0.01909512, 0.06182718, 0.28830475], [0.01756709, 0.04797666, 0.32225745], ]) expected_y_loadings = np.array([ [0.68568625, 0.1674376, 0.0969508], [0.68782064, 0.20375837, -0.1164448], [0.11712173, -0.68046903, 0.12001505], [0.17860457, -0.6798319, -0.05089681], [0.06265739, -0.0277703, 0.74729584], [0.0914178, 0.00403751, -0.5135078], [-0.02196918, -0.01377169, 0.09564505], [-0.03288952, 0.09039729, 0.31858973], [0.04287624, 0.05254676, 0.27836841], ]) assert_array_almost_equal(np.abs(pls.x_loadings_), np.abs(expected_x_loadings)) assert_array_almost_equal(np.abs(pls.x_weights_), np.abs(expected_x_weights)) assert_array_almost_equal(np.abs(pls.y_loadings_), np.abs(expected_y_loadings)) assert_array_almost_equal(np.abs(pls.y_weights_), np.abs(expected_y_weights)) x_loadings_sign_flip = np.sign(pls.x_loadings_ / expected_x_loadings) x_weights_sign_flip = np.sign(pls.x_weights_ / expected_x_weights) y_weights_sign_flip = np.sign(pls.y_weights_ / expected_y_weights) y_loadings_sign_flip = np.sign(pls.y_loadings_ / expected_y_loadings) assert_array_almost_equal(x_loadings_sign_flip, x_weights_sign_flip) assert_array_almost_equal(y_loadings_sign_flip, y_weights_sign_flip) assert_matrix_orthogonal(pls.x_weights_) assert_matrix_orthogonal(pls.y_weights_) assert_matrix_orthogonal(pls._x_scores) assert_matrix_orthogonal(pls._y_scores)
tol=tol, shuffle=True, verbose=0, epsilon=0.1, random_state=random_state, learning_rate='invscaling', eta0=0.01, power_t=0.25, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, warm_start=False, average=False), PLSCanonical(n_components=9, scale=False, algorithm='nipals', max_iter=1000, tol=1e-3, copy=True), CCA(n_components=9, scale=False, max_iter=1000, tol=1e-3, copy=True), MLPRegressor(hidden_layer_sizes=(500, 30), activation='relu', solver='lbfgs', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.00000001, power_t=0.5, max_iter=100000, shuffle=True, random_state=random_state, tol=tol,
def training_lda_TD4_intra(my_clfs, trains, classes, **kw): start_time = time.time() if(kw.has_key('log_fold')): log_fold = root_path + '/result/' + kw['log_fold'] new_fold(log_fold) chan_len = kw['chan_len'] action_num = kw['action_num'] cv = 3 results = [] results.append( ['Feat', 'Algorithm','n_components', 'Channel_Pos', 'Accuracy', 'std']) log_file = 'feat_'+kw['feature_type']+'_intra' clf = sklearn.lda.LDA(solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=0.0001) data_num = trains.shape[0]/action_num scores = sklearn.cross_validation.cross_val_score(clf, trains, classes, cv=cv) results.append(['feat_TD4_cv_'+str(cv), 'lda', 'ALL', 0, scores.mean(), scores.std()]) # 组内训练策略 9组数据 print '组内训练.............' for idx, channel_pos in enumerate(kw['pos_list']): # print '----training TD4 intra , channel_pos: ', channel_pos,'......' trains_intra = trains[:,idx*chan_len: idx*chan_len+chan_len] scores = sklearn.cross_validation.cross_val_score( clf, trains_intra, classes, cv=cv) results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, channel_pos, scores.mean(), scores.std()]) # 中心训练策略 print '中心训练策略.............' trains_intra_S0 = trains[:,0:chan_len] for idx, channel_pos in enumerate(kw['pos_list']): if channel_pos == 'S0': continue tests_shift = trains[:,idx*chan_len: idx*chan_len+chan_len] # if channel_pos == 'L2': # print idx*chan_len, idx*chan_len+chan_len, tests_shift.shape, trains.shape # sys.exit(0) scores = clf.fit(trains_intra_S0, classes).score(tests_shift, classes) results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, 'train S0' + ' test ' + channel_pos, scores.mean(), scores.std()]) # 组训练策略(不同于组内训练策略) 5-fold print '组训练策略.............' trains_intra_S0 = trains[:,0:chan_len] kf = KFold(data_num, n_folds=cv) for idx, channel_pos in enumerate(kw['pos_list']): if channel_pos == 'S0': continue itera = cv scores = np.zeros( (itera,) ) # stds = np.zeros( (itera,) ) itera -= 1 trains_shift = trains[:,idx*chan_len: idx*chan_len+chan_len] for train_idx, test_idx in kf: train_idx_all = np.array([], np.int) test_idx_all = np.array([], np.int) for action_idx in range(action_num): train_idx_all = np.concatenate( (train_idx_all, train_idx*(action_idx+1)), axis=0) test_idx_all = np.concatenate( (test_idx_all, test_idx*(action_idx+1)), axis=0) X_train = np.concatenate( (trains_intra_S0[train_idx_all], trains_shift[train_idx_all]), axis=0) y_train = np.concatenate( (classes[train_idx_all], classes[train_idx_all]), axis=0) X_test = trains_shift[test_idx_all] y_test = classes[test_idx_all] # X_test = trains_shift # y_test = classes score = clf.fit(X_train, y_train).score(X_test, y_test) scores[itera] = score.mean() itera -= 1 # print scores results.append(['feat_TD4_cv_'+str(cv), 'lda', 0, 'S0 + '+channel_pos, np.mean(scores), np.std(scores)]) # 基于CCA的训练策略 5-fold 交叉验证 print 'CCA训练策略.............' trains_S0 = trains[:,0:chan_len] n_components_list = [6, 8, 10, 12, 14, 16] # 子空间维数 # n_components_list = [12,14,16] kf = KFold(data_num, n_folds=cv) for n_components in n_components_list: for idx, channel_pos in enumerate(kw['pos_list']): if channel_pos == 'S0': continue itera = cv scores = np.zeros( (itera,) ) stds = np.zeros( (itera,) ) itera -= 1 trains_shift = trains[:,idx*chan_len: idx*chan_len+chan_len] for train_idx, test_idx in kf: train_idx_all = np.array([], np.int) test_idx_all = np.array([], np.int) for action_idx in range(action_num): train_idx_all = np.concatenate( (train_idx_all, train_idx*(action_idx+1)), axis=0) test_idx_all = np.concatenate( (test_idx_all, test_idx*(action_idx+1)), axis=0) # print train_idx_all.shape, train_idx_all, test_idx_all.shape, test_idx_all # plsca.fit(trains_shift[train_idx_all], trains_S0[train_idx_all]) plsca = PLSCanonical(n_components=n_components) plsca.fit(trains_shift, trains_S0) trains_shift_cca, trains_S0_cca = plsca.transform(trains_shift, trains_S0) X_trains = np.concatenate( (trains_S0_cca, trains_shift_cca[train_idx_all]), axis=0) y_trains = np.concatenate( (classes, classes[train_idx_all]), axis=0) score = clf.fit(X_trains, y_trains).score(trains_shift_cca[test_idx_all], classes[test_idx_all]) scores[itera] = score.mean() # stds[itera] = score.std() itera -= 1 results.append(['feat_TD4_cv_'+str(cv), 'lda_cca', n_components, 'S0 + '+channel_pos, np.mean(scores), np.std(scores)]) log_result(results, log_fold + '/' + log_file + '_action_1-'+str(action_num), 2) print '----Log Fold:', log_fold, ', log_file: ', log_file + '_action_1-'+str(action_num) print '----training TD4 time elapsed:', time.time() - start_time
def training_lda_TD4_inter(my_clfs, trains_S0, trains_shift, classes, **kw): print 'training_lda_TD4_inter.........' start_time = time.time() log_fold = root_path + '/result/' + kw['log_fold'] new_fold(log_fold) chan_len = kw['chan_len'] action_num = kw['action_num'] print "----training "+kw['feature_type']+" inter, training by position O, testing by electrode shift " cv = 5 results = [] results.append(['Feat', 'Algorithm','Channel_Pos', 'Accuracy', 'std']) log_file = 'feat_'+kw['feature_type']+'_inter' clf = sklearn.lda.LDA(solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=0.0001) data_num = trains_S0.shape[0]/action_num # print data_num scores = sklearn.cross_validation.cross_val_score( clf, trains_S0, classes, cv=cv) results.append(['feat_TD4_cv_'+str(cv), 'lda', 'S0', scores.mean(), scores.std()]) kf = KFold(data_num, n_folds=cv) for idx, channel_pos in enumerate(kw['pos_list']): X_test = trains_shift[:,idx*chan_len:idx*chan_len+chan_len] y_test = classes iteration = cv scores = np.zeros((iteration,)) cca_scores = np.zeros((iteration,)) iteration -= 1 for train_idx, test_idx in kf: train_idx_all = np.array([], np.int) test_idx_all = np.array([], np.int) for action_idx in range(action_num): train_idx_all = np.concatenate( (train_idx_all, train_idx*action_idx), axis=0) test_idx_all = np.concatenate( (test_idx_all, test_idx*action_idx), axis=0) # X_train, y_train = trains_S0[train_idx_all], classes[train_idx_all] X_train, y_train = trains_S0, classes X_train_shift, y_train_shift = X_test[train_idx_all], classes[train_idx_all] X_train_all = np.concatenate( (X_train, X_train_shift), axis=0) y_train_all = np.concatenate( (y_train, y_train_shift), axis=0) sys.exit(0) score_inter = clf.fit(X_train_all, y_train_all).score(X_test, y_test) scores[iteration] = score_inter.mean() # print X_train.shape, y_train.shape if channel_pos != 'S0': # plsca = joblib.load(transform_fold+'/cca_transform_'+kw['subject']+'_'+channel_pos+'.model') plsca = PLSCanonical(n_components=14) # print X_test.shape, X_train.shape # sys.exit(0) plsca.fit(X_test[train_idx], X_train) X_test_cca, X_train_cca = plsca.transform(X_test, X_train) cca_score = clf.fit(X_train_cca, y_train).score(X_test_cca, y_test) cca_scores[iteration] = cca_score.mean() iteration -= 1 # print scores # print cca_scores # sys.exit(0) results.append(['feat_TD4', 'lda', channel_pos, np.mean(scores), np.std(scores)]) results.append(['feat_TD4', 'lda_cca', channel_pos, np.mean(cca_scores), np.std(cca_scores)]) log_result(results, log_fold + '/' + log_file + '_' + str(kw['num']), 2) print '----Log Fold:', log_fold, ', log_file: ', log_file + '_' + channel_pos + '_' + str(kw['num']) print '----training TD4 time elapsed:', time.time() - start_time # mean_shift = 0 # std_shift = 0 # for i in range(2, 10): # mean_shift += results[i][4] # std_shift += results[i][5] # mean_shift /= 9 # std_shift /= 9 # results.append(['feat_TD4','lda(svd;tol=0.0001)', 'Shift_means', '1.0', mean_shift, std_shift]) # mean_all = 0 # std_all = 0 # for i in range(1, 10): # mean_all += results[i][4] # std_all += results[i][5] # mean_all /= 9 # std_all /= 9
temp4 = [] for e in temp1[:]: temp4.append(e) for e in temp2[:]: temp4.append(e) if len(temp4) == 600 and len(temp3) == 300: x_n.append(temp4) y_n.append(temp3) npx = np.asarray(x, dtype=np.float64) npy = np.asarray(y, dtype=np.float64) npxn = np.asarray(x_n, dtype=np.float64) npyn = np.asarray(y_n, dtype=np.float64) cca = PLSCanonical(n_components=2) cca.fit_transform(npx, npy) npx, npy = cca.transform(npx, npy) npxn, npyn = cca.transform(npxn, npyn) pls.fit(npx, npy) params = pls.get_params(deep=True) print(params) pls.set_params(**params) y_score = pls.predict(npxn) sim_count = 0 tol = 0.1 for index in range(len(y_score)):
X_train = X[:n // 2] Y_train = Y[:n // 2] X_test = X[n // 2:] Y_test = Y[n // 2:] print("Corr(X)") print(np.round(np.corrcoef(X.T), 2)) print("Corr(Y)") print(np.round(np.corrcoef(Y.T), 2)) # ############################################################################# # Canonical (symmetric) PLS # Transform data # ~~~~~~~~~~~~~~ plsca = PLSCanonical(n_components=2) plsca.fit(X_train, Y_train) X_train_r, Y_train_r = plsca.transform(X_train, Y_train) X_test_r, Y_test_r = plsca.transform(X_test, Y_test) # Scatter plot of scores # ~~~~~~~~~~~~~~~~~~~~~~ # 1) On diagonal plot X vs Y scores on each components plt.figure(figsize=(12, 8)) plt.subplot(221) plt.scatter(X_train_r[:, 0], Y_train_r[:, 0], label="train", marker="o", c="b", s=25)
linear_model.LassoLarsCV(), linear_model.LassoLarsIC(), linear_model.LinearRegression(), LinearSVR(), #linear_model.LogisticRegression(), #linear_model.LogisticRegressionCV(), MLPRegressor(), #linear_model.ModifiedHuber(), #linear_model.MultiTaskElasticNet(), #linear_model.MultiTaskElasticNetCV(), #linear_model.MultiTaskLasso(), #linear_model.MultiTaskLassoCV(), NuSVR(), linear_model.OrthogonalMatchingPursuit(), linear_model.OrthogonalMatchingPursuitCV(), PLSCanonical(), PLSRegression(), linear_model.PassiveAggressiveRegressor(), linear_model.RANSACRegressor(), RadiusNeighborsRegressor(), RandomForestRegressor(), #linear_model.RandomizedLasso(), #linear_model.RandomizedLogisticRegression(), linear_model.RANSACRegressor(), linear_model.Ridge(), linear_model.RidgeCV(), linear_model.SGDRegressor(), SVR(), linear_model.TheilSenRegressor() ]