class MyPLS(): def __init__(self, n_components=2, scale=True, max_iter=500, tol=1e-06, copy=True): self.pls = PLSRegression(n_components, scale, max_iter, tol, copy) def fit(self, X, Y): self.pls.fit(X, Y) return self.pls def predict(self, X, copy=True): return self.pls.predict(X, copy).flatten() def score(self, X, Y, sample_weight=None): return self.pls.score(X, Y, sample_weight) def get_params(self, deep=True): return self.pls.get_params(deep) def set_params(self, **parameters): self.pls.set_params(**parameters) return self @property def intercept_(self): return 0 @property def coeff_(self): return self.pls.coef_
def PLSReg_loop(X, y, params, number_rnd): row = 0 results = pd.DataFrame(columns=[ 'n_components', 'max_iter', 'random_state', 'r2_test', 'r2_train' ]) n_components = params['n_components'] max_iter = params['max_iter'] random_states = np.random.randint(0, 10000, number_rnd) x_load = [] y_load = [] for n in n_components: for m in max_iter: x_loc = [] y_loc = [] for r in random_states: print( 'PARAMS n_components: {}, max_iter: {}, random_state: {}'. format(n, m, r)) plsr = PLSRegression() plsr.set_params(**{'n_components': n, 'max_iter': m}) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=r) #plsr.fit(X_train, y_train) #x_loc += [plsr.x_loadings_] #y_loc += [plsr.y_loadings_] r2_test = 0. #plsr.score(X_test, y_test) r2_train = 0. #plsr.score(X_train, y_train) results.loc[row] = [n, m, r, r2_test, r2_train] row += 1 x_load += [np.array(x_loc)] y_load += [np.array(y_loc)] x_load = np.array(x_load) y_load = np.array(y_load) results.to_csv(params['name'] + '_results') with open(params['name'] + '_x_load', 'wb') as f: pickle.dump(x_load, f) with open(params['name'] + '_y_load', 'wb') as f: pickle.dump(y_load, f)
l_p_t.append(vec_p) l_c_t.append(vec_c) j += 1 sorted_p = np.asarray(l_p) sorted_c = np.asarray(l_c) #Convert the input to an array plc = PLSCanonical() plc.fit_transform(sorted_c, sorted_p) sorted_c, sorted_p = plc.transform(sorted_c, sorted_p) sorted_c_test = np.asarray(l_c_t) sorted_p_test = np.asarray(l_p_t) sorted_c_test, sorted_p_test = plc.transform(sorted_c_test, sorted_p_test) plr = PLSRegression() plr.fit(sorted_c, sorted_p) params = plr.get_params() plr.set_params(**params) y_score = plr.predict(sorted_c_test) sim_count = 0 print("Test Similarity: ") for i in range(len(y_score)): result_sim = 1 - spatial.distance.cosine(y_score[i], sorted_p_test[i]) if result_sim >= 0.85: sim_count += 1 print("Data " + str(i + 1) + " : " + str(result_sim)) accuracy = float(sim_count) / float(len(y_score)) print("Accuracy: " + str(accuracy))
y_n.append(temp3) npx = np.asarray(x, dtype=np.float64) npy = np.asarray(y, dtype=np.float64) npxn = np.asarray(x_n, dtype=np.float64) npyn = np.asarray(y_n, dtype=np.float64) cca = PLSCanonical(n_components=2) cca.fit_transform(npx, npy) npx, npy = cca.transform(npx, npy) npxn, npyn = cca.transform(npxn, npyn) pls.fit(npx, npy) params = pls.get_params(deep=True) print(params) pls.set_params(**params) y_score = pls.predict(npxn) sim_count = 0 tol = 0.1 for index in range(len(y_score)): sub_result = np.subtract(y_score, npyn) result = 1 - spatial.distance.cosine(y_score[index], npyn[index]) print("similarity of test example " + str(index) + " = " + str(result)) if (1 - math.fabs(result)) <= tol: sim_count += 1 print "Count of correct prediction = ", sim_count acc = float(sim_count) / float(len(y_score))