class KRR_calibration: def __init__(self): self.model = 'KRR' def fit(self, X, p, Y, kernel_function='rbf', **kwargs): from sklearn.kernel_ridge import KernelRidge check_attributes(X, Y) self.model = KernelRidge(kernel=kernel_function, **kwargs) observed_bias = Y - p self.model.fit(X, observed_bias) return self.model def predict(self, X, p=None, mode='prob'): if mode == 'bias': return self.model.predict(X) elif mode == 'prob': return self.model.predict(X) + p.flatten() else: raise ValueError("Mode %s is not defined." % mode)
def ridgeReg(X, y): X_train, X_test, y_train, y_test = train_test_split(np.array(X)[:, 6:], np.array(y), test_size=0.20, random_state=1) #print(X_test) regr = KernelRidge(alpha=10, kernel="polynomial", gamma=0.5) regr.fit(X_train, y_train) y_pred = regr.predict(X_test) index = 0 for i in y_pred: #print("ypred = " + str(i) + " y test = " + str(y_test[index])) index = index + 1 #print('Coefficients: \n', regr.coef_) # The mean squared error print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % r2_score(y_test, y_pred)) #What were the real predictions? y_pred_train = regr.predict(X_train) print("Mean squared error on the training set: %.2f" % mean_squared_error(y_train, y_pred_train)) print("Mean squared error on the test set: %.2f" % mean_squared_error(y_test, y_pred)) print("size of X = ", str(len(y)))
def choose_krr_alpha(train_x, test_x, train_y, test_y): alphas = [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.0] alpha_scores = [] best_a_score = 0.0 best_a = "" for a in alphas: krr = KernelRidge(kernel="laplacian", alpha=a) krr.fit(train_x, train_y) krr.predict(test_x) score = krr.score(test_x, test_y) if score > best_a_score: best_a_score = score best_a = a alpha_scores.append(score) print(alpha_scores) print("Best alpha: " + str(best_a)) print("Score received: " + str(best_a_score)) plt.plot(alphas, alpha_scores) plt.xlabel('Alpha') plt.ylabel('Score') plt.title('Tuning Alpha Hyperparameter for KRR') plt.show()
def kernel_ridge(trainData_x, trainData_y, testData_x, testData_y): # Kernel ridge regression classifier = KernelRidge() classifier = classifier.fit(trainData_x, trainData_y) y_pred = classifier.predict(testData_x) y_train_pred = classifier.predict(trainData_x) results(testData_y, y_pred, trainData_y, y_train_pred, "KernelRidge")
def choose_krr_gamma(train_x, test_x, train_y, test_y): gammas = [0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.0] gamma_scores = [] best_g_score = 0.0 best_g = "" for g in gammas: krr = KernelRidge(kernel="laplacian", gamma=g) krr.fit(train_x, train_y) krr.predict(test_x) score = krr.score(test_x, test_y) if score > best_g_score: best_g_score = score best_g = g gamma_scores.append(score) print(gamma_scores) print("Best gamma: " + str(best_g)) print("Score received: " + str(best_g_score)) plt.plot(gammas, gamma_scores) plt.xlabel('Gamma') plt.ylabel('Score') plt.title('Tuning Gamma Hyperparameter for KRR') plt.show()
def prin(X,y,file,dic): t=100 #clf = MLPRegressor(solver=dic['solver'],activation=dic['activation'],hidden_layer_sizes=eval(dic['hls']), batch_size = dic['batch_size'], max_iter=dic['max_iter']) #clf = LinearRegression() clf=KernelRidge(alpha=0.001,kernel='laplacian',degree=18) X_train, X_test, y_train, y_test= cross_validation.train_test_split(X,y,test_size=float(dic['test_size'])) clf.fit(X_train, y_train) print 'Training size',len(X_train) print 'Testing size',len(X_test) #scores = cross_val_score(clf, X, y, cv=5) #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) accuracy = clf.score(X_train,y_train) print 'accuracy',accuracy,'\n' print 'RMSE',math.sqrt(metrics.mean_squared_error(y_test,clf.predict(X_test))) MAE=metrics.mean_absolute_error(y_test,clf.predict(X_test)) print 'MAE',MAE #X_test,y_test=X[-t:],y[-t:] #file=file[-t:] pr=clf.predict(X_test) print 'Filename Percentage Error Actual Value Predicted Value Difference\n' for i in range (len(y_test)): if y_test[i]==0.0: y_test[i]=0.0000001 predi=str(round(((pr[i]-y_test[i])/y_test[i])*100,2))+' %' print file[i]+' '*(20-len(file[i])),' '*(20-len(predi))+ predi, ' '*(20-len(str(y_test[i])))+str(y_test[i]) , ' '*(20-len(str(round(pr[i],2))))+str(round(pr[i],2)),' '*(20-len(str(round((y_test[i]-pr[i]),4))))+str(round((y_test[i]-pr[i]),4)) #print 'Mean square Error',mean_squared_error(X,pr) #print 'R2 score',r2_score(X,pr) #test(X,y,file,clf.coef_[0],clf.intercept_[0]) #plot_g(clf) return MAE
def KRR_CV(self, trainX, testX, trainY, testY): kernel_vals = ['rbf', 'laplacian'] kernel_indices = [0,1] inverse_gamma_vals = [1.0, 10.0, 20.0, 40.0, 80.0] alpha_vals = [0.0001, 0.001, 0.01, 0.1, 1.0] cv_errors = np.empty([len(kernel_vals)*len(inverse_gamma_vals)*len(alpha_vals), 4]) i = 0 for kern in kernel_vals: for g in inverse_gamma_vals: for a in alpha_vals: errors = np.empty([self.cv_split_no, 1]) kf = KFold(n_splits=self.cv_split_no, random_state=30, shuffle=True) j = 0 for train_indices, validation_indices in kf.split(trainX): training_set_X, validation_set_X = trainX[train_indices], trainX[validation_indices] training_set_Y, validation_set_Y = trainY[train_indices], trainY[validation_indices] regr = KernelRidge(alpha=a, gamma=1.0/g, kernel=kern) regr.fit(training_set_X, training_set_Y) predY = regr.predict(validation_set_X) errorY = np.absolute(predY - validation_set_Y) errors[j] = np.mean(errorY) j = j + 1 cv_errors[i,:] = kernel_indices[kernel_vals.index(kern)], g, a, np.mean(errors) i = i + 1 k_opt, g_opt, a_opt, _ = cv_errors[np.argmin(cv_errors[:, 3]), :] k_opt = kernel_vals[kernel_indices.index(k_opt)] regr = KernelRidge(alpha=a_opt, gamma=1.0/g_opt, kernel=k_opt) regr.fit(trainX, trainY) predY = regr.predict(testX) err_on_opt_params = np.absolute(predY - testY) return err_on_opt_params
def train_krrl_linear(self, data): train, validacion = data x_tr, y_tr = train x_val, y_val = validacion #print("El set de train tiene {} filas y {} columnas".format(x_tr.shape[0],x_tr.shape[1])) #print("El set de validacion tiene {} filas y {} columnas".format(x_val.shape[0],x_val.shape[1])) print('Start training KernerRidge with linear kernel...') start_time = self.timer() krrl = KernelRidge(alpha=1) krrl.fit(x_tr, y_tr) print("The R2 is: {}".format(krrl.score(x_tr, y_tr))) # print("The alpha choose by CV is:{}".format(krrl.alpha_)) self.timer(start_time) print("Making prediction on validation data") y_val = np.expm1(y_val) y_val_pred = np.expm1(krrl.predict(x_val)) mae = mean_absolute_error(y_val, y_val_pred) print("El mean absolute error de es {}".format(mae)) print('Saving model into a pickle') try: os.mkdir('pickles') except: pass with open('pickles/krrlLinearK.pkl', 'wb') as f: pickle.dump(krrl, f) print('Making prediction and saving into a csv') y_test = krrl.predict(self.x_test) return y_test
def RunKernel(XTrain, YTrain, XVal, YVal, XTest, YTest): print("Optimizing Kernel Ridge Regression Parameters") #BestAlpha, BestGamma = DoGridSearch(XTrain, YTrain.ravel()) BestAlpha = 0.01 BestGamma = 0.001 KRR = KernelRidge(kernel='laplacian', gamma=BestGamma, alpha=BestAlpha) KRR.fit(XTrain, YTrain.ravel()) YPredTrain = KRR.predict(XTrain) DiffYTrain = abs(YPredTrain - YTrain.ravel()) print(sum(DiffYTrain) / float(len(DiffYTrain))) YPred = KRR.predict(XTest) DiffY = abs(YPred - YTest.ravel()) MAEPredicted = sum(DiffY) / float(len(DiffY)) print(BestAlpha, BestGamma) print(MAEPredicted) plt.scatter(YTest.tolist(), YPred.tolist(), c='red', s=5) plt.plot(np.linspace(0, 0.5, 2), np.linspace(0, 0.5, 2)) plt.ylabel('Predicted Excitation Energy (a.u.)') plt.xlabel('True Excitation Energy (a.u.)') plt.title( 'Kernel Ridge Regression (Laplacian) Learned Excitation Energies') plt.show() #RunKernel()
def choose_krr_kernel(train_x, test_x, train_y, test_y): kernels = ['linear', 'rbf', 'laplacian', 'polynomial', 'sigmoid'] kernel_scores = [] best_k_score = 0.0 best_k = "" for k in kernels: krr = KernelRidge(kernel=k) krr.fit(train_x, train_y) krr.predict(test_x) score = krr.score(test_x, test_y) if score > best_k_score: best_k_score = score best_k = k kernel_scores.append(score) print(kernel_scores) print("Best kernel: " + str(best_k)) print("Score received: " + str(best_k_score)) plt.bar(kernels, kernel_scores) plt.xlabel('Kernel') plt.ylabel('Score') plt.xticks(np.arange(len(kernels)), kernels) plt.title('Tuning Kernel Hyperparameter for KRR') plt.show()
def kernel_ridge(trainData_x, trainData_y, testData_x, testData_y, COSMIC_num): # Kernel ridge regression classifier = KernelRidge(alpha=0.1) classifier = classifier.fit(trainData_x, trainData_y) y_pred = classifier.predict(testData_x) y_train_pred = classifier.predict(trainData_x) results(testData_y, y_pred, trainData_y, y_train_pred, "KernelRidge", COSMIC_num)
class KernelRidgeRegression(Oracle): def __init__(self, kernel='rbf'): self.kernel = kernel self.model = KernelRidge(alpha=1, kernel=kernel, gamma=None, degree=5, coef0=1, kernel_params=None) def predict(self, X_nxp: np.array): return self.model.predict(X_nxp), self.oracle_std * np.ones( (X_nxp.shape[0])) def fit(self, X_nxp: np.array, gt_n: np.array, weights_n: np.array = None, k_estimate_var: int = 4, epochs: int = None, seed: int = None, verbose: bool = False): if weights_n is None: weights_n = np.ones([gt_n.size]) # ------ fit oracle variance ----- kf = KFold(n_splits=k_estimate_var, shuffle=True) kf.get_n_splits(X_nxp) oracle_var = 0.0 for k, idx in enumerate(kf.split(X_nxp)): train_idx, val_idx = idx xtr_nx1, xval_nx1 = X_nxp[train_idx], X_nxp[val_idx] ytr_n, yval_n = gt_n[train_idx], gt_n[val_idx] wtr_n, wval_n = weights_n[train_idx], weights_n[val_idx] self.model.fit(xtr_nx1, ytr_n, sample_weight=wtr_n) oracle_var += np.mean( wval_n * np.square(self.model.predict(xval_nx1) - yval_n)) oracle_var /= float(k_estimate_var) oracle_std = np.sqrt(oracle_var) self.oracle_std = oracle_std self.model.fit(X_nxp, gt_n, sample_weight=weights_n) def get_parameters(self): return self.model, self.oracle_std def set_parameters(self, value): self.model = deepcopy(value[0]) self.oracle_std = value[1] parameters = property(get_parameters, set_parameters) def get_initialization_kwargs(self): return {'kernel': self.kernel} def save(self, savepath: str): print("Not saving KernelRidgeRegression.")
def krr_base_model(): maes = [] rmses = [] submission = pd.read_csv(os.path.join(DATA_DIR, 'sample_submission.csv'), index_col='seg_id') scaled_train_X = pd.read_csv('./result/scaled_train_X.csv') scaled_test_X = pd.read_csv('./result/scaled_test_X.csv') train_y = pd.read_csv('./result/train_y.csv') predictions = np.zeros(len(scaled_test_X)) n_fold = 8 folds = KFold(n_splits=n_fold, shuffle=True, random_state=42) fold_importance_df = pd.DataFrame() fold_importance_df["Feature"] = scaled_train_X.columns for fold_, (trn_idx, val_idx) in enumerate( folds.split(scaled_train_X, train_y.values)): print('working fold %d' % fold_) strLog = "fold {}".format(fold_) print(strLog) X_tr, X_val = scaled_train_X.iloc[trn_idx], scaled_train_X.iloc[ val_idx] y_tr, y_val = train_y.iloc[trn_idx], train_y.iloc[val_idx] y_tr = y_tr['time_to_failure'] y_val = y_val['time_to_failure'] model = KernelRidge(kernel='rbf', alpha=0.001, gamma=0.001) model.fit(X_tr, y_tr) # predictions preds = model.predict(scaled_test_X) predictions += preds / folds.n_splits preds = model.predict(X_val) # mean absolute error mae = mean_absolute_error(y_val, preds) print('MAE: %.6f' % mae) maes.append(mae) # root mean squared error rmse = mean_squared_error(y_val, preds) print('RMSE: %.6f' % rmse) rmses.append(rmse) # fold_importance_df['importance_%d' % fold_] = model.feature_importances_[:len(scaled_train_X.columns)] print('MAEs', maes) print('MAE mean: %.6f' % np.mean(maes)) print('RMSEs', rmses) print('RMSE mean: %.6f' % np.mean(rmses)) submission['time_to_failure'] = predictions submission.to_csv('submission_krr_8.csv') return predictions
class VADEstimator(BaseEstimator): def fit( self, x , y , size=1 ): self.model = Sequential() self.model.add(Dense( int( embeddings_dim / 2.0 ) , input_dim=embeddings_dim , init='uniform' , activation='tanh')) self.model.add(Dense( int( embeddings_dim / 4.0 ) , init='uniform' , activation='tanh')) self.model.add(Dense(size , init='uniform' ) ) self.model.compile(loss='mse', optimizer='rmsprop') self.model = KernelRidge( kernel='rbf' ) self.model.fit( x , y ) def predict( self, x ): if isinstance( self.model , Sequential ): return self.model.predict( x , verbose=0 )[ 0 ] return self.model.predict( x )
def krr_linear(i, train, test, M, alpha, limit=None): if limit: (Xtrain,Ytrain),(Xtest,Ytest) = train_test_build(i,train,test,M,limit) else: (Xtrain,Ytrain),(Xtest,Ytest) = train_test_build(i,train,test,M) clf = KernelRidge(alpha=alpha) clf.fit(Xtrain,Ytrain) pred_test = clf.predict(Xtest) pred_train = clf.predict(Xtrain) train_error = sum((pred_train-Ytrain)**2) test_error = sum((pred_test-Ytest)**2) return (train_error, test_error)
def kernel_ridge(X, X_test, y, params): alpha = params.get("alpha", 1) gamma = params.get("gamma", 0.1) kernel = params.get("kernel", "rbf") kf = KFold(n_splits=5, shuffle=True) scores = [] pred = np.array(len(y)) for train, test in kf.split(X, y): regr = KernelRidge(alpha=alpha, kernel=kernel, gamma=gamma) regr.fit(X[train], y[train]) pred[test] = regr.predict(X[test]) scores.append(sklearn.metrics.r2_score(pred[test], y[test])) regr.fit(X, y) return regr.predict(X_test), scores, pred
def ridgeRegression(ATrain, performanceTrain, distortionTrain, ATest, performanceTest, distortionTest): model = KernelRidge(alpha=0.01, kernel='sigmoid') model.fit(ATrain, performanceTrain) performancePred = model.predict(ATest) performanceErr = sum( abs(performancePred - performanceTest)) / len(performanceTest) print 'Kernel ridge performance error: ', performanceErr model.fit(ATrain, distortionTrain) distortionPred = model.predict(ATest) distortionErr = sum( abs(distortionPred - distortionTest)) / len(distortionTest) print 'Kernel ridge distortion error: ', distortionErr histoPlot(performancePred, performanceTest) histoPlot(distortionPred, distortionTest)
class KernelRidgeImpl(): def __init__(self, alpha=1, kernel='linear', gamma=None, degree=3, coef0=1, kernel_params=None): self._hyperparams = { 'alpha': alpha, 'kernel': kernel, 'gamma': gamma, 'degree': degree, 'coef0': coef0, 'kernel_params': kernel_params } self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def kernel_ridge_pre(X_train, y_train, X_pre, val): kernel = KernelRidge(kernel=val['kernel'], alpha=val['alpha'], gamma=val['gamma']) kernel.fit(X_train, y_train) y_pre = kernel.predict(X_pre) return y_pre
def mainregress(selection, alpha): if len(selection) < 2: return x = xdown.get()['value'] y = ydown.get()['value'] tabdata = [] mldatax = [] mldatay = [] species = iris.Species.unique() for i, p in enumerate(selection['points']): mldatax.append(p['x']) mldatay.append(p['y']) tabdata.append({ x: p['x'], y: p['y'], 'species': species[p['curve']] }) X = np.c_[mldatax, np.array(mldatax) ** 2] ridge = KernelRidge(alpha=alpha).fit(X, mldatay) xspace = np.linspace(min(mldatax)-1, max(mldatax)+1, 100) plot = pw.scatter(mldatax, mldatay, label='train', markersize=15) for i, df in iris.groupby('Species'): plot += pw.scatter(df[x], df[y], label=i) plot += pw.line(xspace, ridge.predict(np.c_[xspace, xspace**2]), label='model', mode='lines') plot.xlabel = x plot.ylabel = y linear.do_all(plot.dict) table1.do_data(pd.DataFrame(tabdata))
def ridge_regression(K1, K2, y1, y2, alpha, c): n_val, n_train = K2.shape clf = KernelRidge(kernel="precomputed", alpha=alpha) one_hot_label = np.eye(c)[y1] - 1.0 / c clf.fit(K1, one_hot_label) z = clf.predict(K2).argmax(axis=1) return 1.0 * np.sum(z == y2) / n_val
def get_SVM_NTK(self, for_test: bool): if self.params['kernel_ridge']: clf = KernelRidge(alpha=self.params['ridge_coef'][0], kernel="precomputed") else: clf = SVR(kernel="precomputed", C=self.params['svm_coef'][0], epsilon=self.params['svm_coef'][1], cache_size=100000) output = [] train = not for_test Ys_ = self.test_Ys_ if for_test else self.Ys_ N = self.N_test if for_test else self.N_train for idx in range(N): NTK_train = self.get_ntk(fst_train=train, fst_idx=idx, fst_qry=False, snd_train=train, snd_idx=idx, snd_qry=False, ridge=True) NTK_test = self.get_ntk(fst_train=train, fst_idx=idx, fst_qry=True, snd_train=train, snd_idx=idx, snd_qry=False, ridge=False) y = Ys_[idx] time_evolution = self.time_evolution(NTK_train, self.params['inner_lr']) clf.fit(X=NTK_train, y=time_evolution @ y) pred = clf.predict(X=NTK_test) output.append(pred) return np.concatenate(output)
def run(self, ind_sampling, ind_fold): if self.fold_setting=="S4": nb_fold = self.nb_fold * self.nb_fold self.load_CV_indexes(ind_sampling) if self.CV_type == 'ClusterCV_': ajout = self.CV_type else: ajout = 'CV_' K_train, K_test = self.make_Ktrain_and_Ktest_MT_with_settings(self.samples_tr[ind_fold], self.samples_te[ind_fold]) pred_score = [] for param in range(len(self.list_param)): if self.type_clf=="SVM": clf = svm.SVC(kernel='precomputed', C=self.list_param[param]) clf.fit(K_train, self.labels_tr[ind_fold]) Y_test_score = clf.decision_function(K_test).tolist() elif self.type_clf=="KernelRidge": clf = KernelRidge(alpha=self.list_param[param], kernel='precomputed') clf.fit(K_train, inner_labels_tr[ind_fold]) Y_test_score = clf.predict(K_test).tolist() else: raise ValueError('invalid value of type_clf') pred_score.append(Y_test_score) del clf del Y_test_score pickle.dump(pred_score, open('saved_results/MT/MT_'+str(self.nb_fold)+'fold'+ajout+self.fold_setting+"_"+self.type_clf+"_"+str(ind_fold)+"_"+str(ind_sampling)+".data", 'wb')) del K_train del K_test
def choose_alpha_ridge(X, y, range_C, gammaX, plot_color): '''Implement 5 fold cv to determine optimal gamma''' #Param setup kf = KFold(n_splits = 5) mean_error=[]; std_error=[]; for C in range_C: #Params mse_temp = [] #Model model = KernelRidge(alpha= 1.0/(2*C), kernel= 'rbf', gamma=gammaX) #5 fold CV for train, test in kf.split(X): #Model model.fit(X[train], y[train]) ypred = model.predict(X[test]) mse = mean_squared_error(y[test], ypred) mse_temp.append(mse) #Get mean & variance mean_error.append(np.array(mse_temp).mean()) std_error.append(np.array(mse_temp).std()) #Plot fig = plt.figure(figsize=(15,12)) plt.errorbar(range_C, mean_error, yerr=std_error, color = plot_color) plt.xlabel('C') plt.ylabel('Mean square error') plt.title('Choice of C in kernelised Ridge Regression - 5 fold CV, gamma = {}'.format(gammaX)) plt.show()
def local_bias_estimator(X, Y, p, X_grid, model='KRR', kernel_function='rbf', **kwargs): check_attributes(X, Y) if model == 'KRR': from sklearn.kernel_ridge import KernelRidge model = KernelRidge(kernel=kernel_function, **kwargs) # kr = KernelRidge(alpha=alpha, kernel='rbf', **kwargs) elif model == 'SVR': from sklearn.svm import SVR model = SVR(kernel=kernel_function, **kwargs) elif model == 'EWF': K = pairwise_kernels(X, X_grid, metric=kernel_function, **kwargs) p_err = Y - p bias = np.sum(p_err.flatten() * K.T, axis=1) / np.sum(K.T, axis=1) return bias else: raise ValueError("Model %s is not defined." % model) bias_calibration = Y - p model.fit(X, bias_calibration) bias = model.predict(X_grid) return bias
def get_reconstruction_error(ct, data, nsplits=4, clf='kridge'): tasknames = [i.split('.')[0] for i in data.columns] tasks = list(set(tasknames)) tasks.sort() chosen_vars = [] #print(ct,tasks,tasknames) for i in ct: vars = [ j for j in range(len(tasknames)) if tasknames[j].split('.')[0] == tasks[i] ] chosen_vars += vars kf = KFold(n_splits=nsplits, shuffle=True) fulldata = data.values #subdata=data.ix[:,chosen_vars].values if clf == 'kridge': linreg = KernelRidge(alpha=1) elif clf == 'rf': linreg = RandomForestRegressor() else: linreg = LinearRegression() scaler = StandardScaler() pred = numpy.zeros(fulldata.shape) for train, test in kf.split(fulldata): #fulldata_train=fulldata[train,:] #fulldata_test=fulldata[test,:] # fit scaler to train data and apply to test fulldata_train = scaler.fit_transform(fulldata[train, :]) fulldata_test = scaler.transform(fulldata[test, :]) subdata_train = fulldata_train[:, chosen_vars] subdata_test = fulldata_test[:, chosen_vars] linreg.fit(subdata_train, fulldata_train) pred[test, :] = linreg.predict(subdata_test) cc = numpy.corrcoef(scaler.transform(fulldata).ravel(), pred.ravel())[0, 1] return cc
def kernel_ridge(X_train, y_train, X_test, y_test, val): kernel = KernelRidge(kernel=val['kernel'], alpha=val['alpha'], gamma=val['gamma']) kernel.fit(X_train, y_train) y_pre = kernel.predict(X_test) r2, mse = show_metrics('Kernel Ridge', y_test, y_pre) return r2, mse
def test_kernel_ridge_singular_kernel(): # alpha=0 causes a LinAlgError in computing the dual coefficients, # which causes a fallback to a lstsq solver. This is tested here. pred = Ridge(alpha=0, fit_intercept=False).fit(X, y).predict(X) kr = KernelRidge(kernel="linear", alpha=0) ignore_warnings(kr.fit)(X, y) pred2 = kr.predict(X) assert_array_almost_equal(pred, pred2)
def outofsample_extensions(method='kernel-regression'): # Load the data and init seeds train_data, train_labels, test_data, test_labels = load_mnist( dataset_path='data') np.random.seed(1) sklearn.utils.check_random_state(1) n_train_samples = 5000 # Learn a new space using Isomap isomap = Isomap(n_components=10, n_neighbors=20) train_data_isomap = np.float32( isomap.fit_transform(train_data[:n_train_samples, :])) sigma = mean_data_distance(np.float32(train_data[:n_train_samples, :])) if method == 'kernel-regression': # Use kernel regression to provide baseline out-of-sample extensions proj = KernelRidge(kernel='rbf', gamma=(1.0 / sigma**2)) proj.fit(np.float64(train_data[:n_train_samples, :]), np.float64(train_data_isomap)) acc = evaluate_svm(proj.predict(train_data[:n_train_samples, :]), train_labels[:n_train_samples], proj.predict(test_data), test_labels) elif method == 'cK-ISOMAP-10d' or method == 'cK-ISOMAP-20d': # Use the SEF to provide out-of-sample extensions if method == 'cK-ISOMAP-10d': dims = 10 else: dims = 20 proj = KernelSEF(train_data[:n_train_samples], train_data.shape[1], output_dimensionality=dims) proj.cuda() loss = proj.fit(data=train_data[:n_train_samples, :], target_data=train_data_isomap, target='copy', epochs=100, batch_size=128, verbose=True, learning_rate=0.00001, regularizer_weight=0.001) acc = evaluate_svm(proj.transform(train_data[:n_train_samples, :]), train_labels[:n_train_samples], proj.transform(test_data), test_labels) print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def AlgoKRR(df_train, df_trainY): # model = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5) rmsle_cv(model, df_train, df_trainY) model.fit(df_train, df_trainY) result = model.predict(df_train) print("rms value of same set: ", np.around(sqrt(mean_squared_error(df_trainY, result)), decimals=7)) return model
def uniform(self, method=None): tmp = self.make_test_kernels(self.x, self.x, subsampling=self.subsampling) mu = np.ones(self.n_features) / self.n_features g = self.sum_weight_kernels(tmp, mu)**self.degree yPredictR = KernelRidge.predict(self.gTest) yPredictC = 2 * (self.yPredictR >= 0.) - 1
def train_test_model(vol_df, short_term_days, features, model, train_date, test_date): predX = vol_df.ix[-short_term_days - 1:, features] pred_end_date = w.tdaysoffset(short_term_days, predX.index[-1]).Data[0][0] pred_dates = w.tdays(predX.index[-1].strftime("%Y-%m-%d"), pred_end_date.strftime("%Y-%m-%d")).Times predX.index = pred_dates vol_df[features] = vol_df[features].shift(short_term_days) vol_df.dropna(inplace=True) train_df = vol_df[vol_df.index <= test_date] test_df = vol_df[vol_df.index >= test_date] train_df = train_df.append(test_df.ix[0]) # 为了让图看上去连续 trainX, trainY = train_df[features], train_df['vol'] testX, testY = test_df[features], test_df['vol'] # 用Lasso来做feature selection lasso = linear_model.Lasso(alpha=0.0005) lasso.fit(trainX, trainY) sfm = SelectFromModel(lasso, prefit=True) fea_trainX = sfm.transform(trainX) fea_testX = sfm.transform(testX) fea_predX = sfm.transform(predX) if model == "LinearRegression": reg_model = linear_model.LinearRegression() elif model == "KernelRidgeRegression": reg_model = KernelRidge(kernel='rbf') elif model == "SupportVectorRegression": reg_model = SVR(kernel="linear") elif model == "Ridge": reg_model = linear_model.Ridge() elif model == "RandomForestRegression": reg_model = RandomForestRegressor() elif model == "AdaBoostRegression": reg_model = AdaBoostRegressor() else: return NotImplementedError reg_model.fit(fea_trainX, trainY) train_pred = reg_model.predict(fea_trainX) test_pred = reg_model.predict(fea_testX) pred_pred = reg_model.predict(fea_predX) return (pd.DataFrame({'pred': train_pred}, index=trainX.index), pd.DataFrame({'pred': test_pred}, index=testX.index), pd.DataFrame({'pred': pred_pred}, index=predX.index))
def lgo_sklearn(X,y, groups, regparam): logo = LeaveOneGroupOut() errors = [] for train, test in logo.split(X, y, groups=groups): rls = KernelRidge(kernel="rbf", gamma=0.01) rls.fit(X[train], y[train]) p = rls.predict(X[test]) e = sqerror(y[test], p) errors.append(e) return np.mean(errors)
def lpo_sklearn(X,y, regparam): lpo = LeavePOut(p=2) preda = [] predb = [] for train, test in lpo.split(X): rls = KernelRidge(kernel="rbf", gamma=0.01) rls.fit(X[train], y[train]) p = rls.predict(X[test]) preda.append(p[0]) predb.append(p[1]) return preda, predb
def ANM_causation_score(self,train_size=0.5,independence_criterion='HSIC',metric='linear',regression_method='GP'): ''' Measure how likely a given causal direction is true Parameters ---------- train_size : Fraction of given data used to training phase independence_criterion : kruskal for Kruskal-Wallis H-test, HSIC for Hilbert-Schmidt Independence Criterion metric : linear, sigmoid, rbf, poly kernel function to compute gramm matrix for HSIC gaussian kernel is used in : Nonlinear causal discovery with additive noise models Patrik O. Hoyer et. al Returns ------- causal_strength: A float between 0. and 1. ''' Xtrain, Xtest , Ytrain, Ytest = train_test_split(self.X, self.Y, train_size = train_size) if regression_method == 'GP': _gp = pyGPs.GPR() # specify model (GP regression) _gp.getPosterior(Xtrain, Ytrain) # fit default model (mean zero & rbf kernel) with data _gp.optimize(Xtrain, Ytrain) # optimize hyperparamters (default optimizer: single run minimize) #Forward case #_gp = KernelRidge(kernel='sigmoid',degree=3) #_gp.fit(Xtrain,Ytrain) ym, ys2, fm, fs2, lp = _gp.predict(Xtest) #_gp.plot() #errors_forward = _gp.predict(Xtest) - Ytest errors_forward = ym - Ytest else: _gp = KernelRidge(kernel='sigmoid') _gp.fit(Xtrain, Ytrain) errors_forward = _gp.predict(Xtest) - Ytest #Independence score forward_indep_pval = { 'kruskal': kruskal(errors_forward,Xtest)[1], 'HSIC': self.HilbertSchmidtNormIC(errors_forward,Xtest,metric=metric)[1] }[independence_criterion] return {'causal_strength':forward_indep_pval}
def xyz_kde(xyz,gamma,N_grid=100): xy = xyz[:,:-1] z = xyz[:,-1] x_edges = np.linspace(np.min(xy[:,0]),np.max(xy[:,0]),N_grid+1) y_edges = np.linspace(np.min(xy[:,1]),np.max(xy[:,1]),N_grid+1) x_centres = np.array([x_edges[b] + (x_edges[b+1]-x_edges[b])/2 for b in range(N_grid)]) y_centres = np.array([y_edges[b] + (y_edges[b+1]-y_edges[b])/2 for b in range(N_grid)]) x_grid, y_grid = np.meshgrid(x_centres,y_centres) xy_grid = np.array([np.ravel(x_grid),np.ravel(y_grid)]).T clf = KernelRidge(kernel='rbf',gamma=gamma).fit(xy,z) H = clf.predict(xy_grid).reshape(N_grid,N_grid) return H, x_grid, y_grid, gamma
def plot_kernel_ridge(X, y, gamma=0.5, alpha=0.1): # kernel (ridge) regression krr = KernelRidge(kernel="rbf", gamma=gamma, alpha=alpha); krr.fit(X,y); # predict x_plot = np.linspace(min(X), max(X), 100)[:,np.newaxis]; y_plot = krr.predict(x_plot); # plot plt.figure(figsize=(8,4.8)); plt.plot(X, y, 'or'); plt.plot(x_plot, y_plot) # plt.title(r"Gaussian Kernel ($\gamma=%0.2f, \alpha=%0.2f$)" % (gamma,alpha), fontsize=16) plt.title(r"Gaussian Kernel ($\gamma=%0.2f$)" % (gamma), fontsize=16)
def modelfitOne(train_X, train_y, test_X, yd, ImageId, FeatureName): n_clf = 1 # 拟合器 clf = KernelRidge(kernel='rbf', gamma=6e-4, alpha=2e-2) # 训练 print('-----------------开始训练...------------------') clf.fit(train_X, train_y) # 预测 print('-----------------开始预测...------------------') pred = clf.predict(test_X) predicted = np.zeros(len(FeatureName)) for i in range(len(FeatureName)): if i % 500 == 0: print('i =', i) else: pass imageID = ImageId[i] clfID = yd[FeatureName[i]] predicted[i] = pred[imageID, clfID] predicted = predicted*48.+48. return predicted
class Learner(): path = 'matrices/' inputF = 'inputs.npy' stateF = 'states.npy' itrF = 'itr.npy' inptFile = os.path.join(path, inputF) stateFile = os.path.join(path, stateF) itrFile = os.path.join(path, itrF) itr = np.array([]) useSHIV = False THRESH = 0.45 ahqp_solver_g = AHQP(sigma=6) ahqp_solver_b = AHQP(sigma=5,nu=1e-3) def trainModel(self, s=None, a=None): """ Trains model on given states and actions. Uses neural net or SVM based on global settings. """ states, actions = self.states[3:], self.actions[3:] #print "states.shape" #print states.shape #print "actions.shape" #print actions.shape if len(self.itr) == 0: self.itr = np.array([states.shape[0]]) else: self.itr = np.hstack((self.itr, states.shape[0])) '''if states.shape[0] > 2700.0: f = os.path.join(self.path, 'statesToValidate.npy') np.save(f, states) IPython.embed()''' fits = [] #actions = actions.ravel() self.clf = KernelRidge(alpha=1.0) self.clf.kernel = 'rbf' print "SIZE: ", states.shape self.clf.fit(states, actions) #IPython.embed() actions_pred = self.clf.predict(states) bad_state = np.zeros(actions_pred.shape[0]) for i in range(actions_pred.shape[0]): fit = LA.norm(actions_pred[i,:] - actions[i,:]) fits.append(fit) med = np.median(np.array(fits)) for fit in fits: if(fit>med): bad_state[i] = 1 IPython.embed() if self.useSHIV: self.labels = np.zeros(states.shape[0])+1.0 self.scaler = preprocessing.StandardScaler().fit(states) states_proc = self.scaler.transform(states) good_labels = bad_state == 0.0 states_g = states_proc[good_labels,:] bad_labels = bad_state == 1.0 states_b = states_proc[bad_labels,:] #IPython.embed() self.ahqp_solver_g.assembleKernel(states_g, np.zeros(states_g.shape[0])+1.0) self.ahqp_solver_b.assembleKernel(states_b, np.zeros(states_b.shape[0])+1.0) #IPython.embed() self.ahqp_solver_g.solveQP() self.ahqp_solver_b.solveQP() #score = self.clf.score(states, actions) #print score self.plot(fits, states, med) def askForHelp(self,state): if self.useSHIV: state = self.scaler.transform(state) if self.ahqp_solver_b.predict(state)==1.0: return -1.0 else: return self.ahqp_solver_g.predict(state) else: return -1 def plot(self, fits, states, threshold): index = range(len(states)) t = np.ones(len(index)) * threshold plt.figure(1) plt.plot(index, fits, color='b', linewidth=4.0) plt.plot(index, t, color='r', linewidth=4.0) plt.ylabel('Fit') plt.xlabel('Index of State') plt.show() def getAction(self, state): """ Returns a prediction given the input state. Uses neural net or SVM based on global settings. """ return self.clf.predict(state) def initModel(self, useSHIV): self.useSHIV = useSHIV try: self.states = np.load(self.stateFile) self.actions = np.load(self.inptFile) except IOError: self.states = np.array([-8,8.75,0,-12,22,0,-15,21.13043404, 0,-12,18.52173996,0,-15,14.173913, 0,-12,8.08695698,0,0,0,0,0]) self.actions = np.array([0,0,0,0]) #self.trainModel(self.states, self.actions) def updateModel(self, s, a): self.states = np.vstack((self.states, s)) self.actions = np.vstack((self.actions,a)) #self.trainModel(self.states, self.actions) def saveModel(self): path = 'matrices/oldData/' currT = strftime("%Y-%m-%d %H:%M:%S", gmtime()) inptFileOut = os.path.join(path, 'inputs' + currT + '.npy') stateFileOut = os.path.join(path, 'states' + currT + '.npy') np.save(stateFileOut, self.states) np.save(inptFileOut, self.actions) np.save(self.itrFile, self.itr)
def dfun(x): return 2*sp.sin(x)*sp.cos(x) + sp.exp(-sp.cos(x)) * sp.sin(x) lengthscale = 1. gamma = 1 / (2 * lengthscale**2) krr = KernelRidge(kernel='rbf_periodic', gamma=gamma, alpha=1.0e-1, gammaL=0.1*gamma, gammaU=10*gamma, max_lhood=False) X = 12*sp.random.random_sample(210) - 1 X.sort() y = fun(X) + sp.random.normal(scale=0.1, size=len(X)) X = sp.atleast_2d(X).T krr.fit_w_noise(X, y) Xtest = sp.atleast_2d(sp.linspace(X.min(), X.max(), 200)).T y_pred, MSE = krr.predict(Xtest, MSE=True) y_smooth = krr.predict(X).ravel() yprime_ = krr.predict_gradient(Xtest).ravel() print("noise = %.3e, lengthscale = %.3e" % (krr.noise.mean(), 1/(2 * krr.gamma)**0.5)) plt.clf() plt.close() fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(24, 16)) ax0.plot(Xtest, fun(Xtest), 'g--') ax0.scatter(X, y, c='r', marker = '+', alpha = .5) ax0.scatter(Xtest, y_pred, c='b', marker = 'o', alpha = .5) # ax0.fill(sp.concatenate([X, X[::-1]]), # sp.concatenate([y_smooth - 1.9600 * krr.noise,
tokenizer.fit_on_texts(train_texts) train_sequences = sequence.pad_sequences( tokenizer.texts_to_sequences( train_texts ) , maxlen=max_sent_len ) test_sequences = sequence.pad_sequences( tokenizer.texts_to_sequences( test_texts ) , maxlen=max_sent_len ) train_matrix = tokenizer.texts_to_matrix( train_texts ) test_matrix = tokenizer.texts_to_matrix( test_texts ) embedding_weights = np.zeros( ( max_features , embeddings_dim ) ) for word,index in tokenizer.word_index.items(): if index < max_features: try: embedding_weights[index,:] = embeddings[word] except: embedding_weights[index,:] = np.random.rand( 1 , embeddings_dim ) print ("") print ("Method = Linear ridge regression with bag-of-words features") model = KernelRidge( kernel='linear' ) model.fit( train_matrix , train_labels ) results = model.predict( test_matrix ) if not(is_geocoding): print ("RMSE = " + repr( np.sqrt(mean_squared_error( test_labels , results )) ) ) print ("MAE = " + repr( mean_absolute_error( test_labels , results ) ) ) else: print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) ) print ("") print ("Method = MLP with bag-of-words features") np.random.seed(0) model = Sequential() model.add(Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu')) model.add(Dropout(0.25)) model.add(Dense(embeddings_dim, activation='relu')) model.add(Dropout(0.25))
#from sklearn.svm import SVR from sklearn.kernel_ridge import KernelRidge import numpy as np n_samples, n_features = 10, 5 np.random.seed(0) y = np.random.randn(n_samples) print y print X = np.random.randn(n_samples, n_features) print X #clf = SVR(C=1.0, epsilon=0.2) clf = KernelRidge(alpha=1.0) clf.fit(X, y) print y[1] print clf.predict(X[1])
Overall_Y_Pred = np.zeros(len(X)) for i in [t+1 for t in list(range(4))]: to_exclude = list(range(i)) folder_train = np.asarray(to_exclude).astype(int) #index_train starts with the first folder index_train = index[folder_train]; index_test = [element for i, element in enumerate(index) if i not in to_exclude] print (len(index_test)) #train set starts with the first folder X_train = X[np.hstack(index_train)] Y_train = Y[np.hstack(index_train)] X_test = X[np.hstack(index_test)] Y_test = Y[np.hstack(index_test)] # train on training sets model.fit(X_train, Y_train) Y_test_Pred = model.predict(X_test) rmse = np.sqrt(mean_squared_error(Y_test, Y_test_Pred)) rmse_list.append(rmse) print (rmse_list) #Plot: y = np.asarray(rmse_list) x = np.asarray([t+1 for t in list(range(4))]) plt.plot(x, y, x, y, 'rs') plt.title('Number of Folders in Training Set vs. rmse of Test Set') plt.xlabel('Number of Folders in Training Set') plt.ylabel('Overall RMSE of Test Set') plt.grid(True) plt.show()
##################################################################### # For each parameter trial for i in xrange(trials): # For regression use the Kernel Ridge method if model_type == "regression": print "\n Starting experiment for trial %d and parameter alpha = %3f\n " % (i, alpha_grid[i]) # Fit the kernel ridge model KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i]) KR.fit(K_train, y_train) # predict on the validation and test set y_pred = KR.predict(K_val) y_pred_test = KR.predict(K_test) # adjust prediction: needed because the training targets have been normalizaed y_pred = y_pred * float(y_train_std) + y_train_mean y_pred_test = y_pred_test * float(y_train_std) + y_train_mean # root mean squared error on validation rmse = np.sqrt(mean_squared_error(y_val, y_pred)) perf_all_val.append(rmse) # root mean squared error in test rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test)) perf_all_test.append(rmse_test) print "The performance on the validation set is: %3f" % rmse
data2 = [ ( [ float(row[i]) for i in range(len(row) - 2) ] , ( float( row[ len(row) - 2 ] ) , float( row[ len(row) - 1 ] ) ) ) for row in csv.reader( open("default_plus_chromatic_features_1059_tracks.txt"), delimiter=',', quoting=csv.QUOTE_NONE) ] np.random.seed(0) np.random.shuffle( data2 ) train_size2 = int(len(data2) * percent) train_matrix2 = np.array( [ features for ( features, label ) in data2[0:train_size2] ] ) test_matrix2 = np.array( [ features for ( features, label ) in data2[train_size2:-1] ] ) train_labels2 = [ label for ( features , label ) in data2[0:train_size2] ] test_labels2 = [ label for ( features , label ) in data2[train_size2:-1] ] train_matrix2 = preprocessing.scale( train_matrix2 ) test_matrix2 = preprocessing.scale( test_matrix2 ) print ("") print ("Method = Linear ridge regression - Default features") model = KernelRidge( kernel='linear' ) model.fit( train_matrix1 , train_labels1 ) results = model.predict( test_matrix1 ) print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Method = Linear ridge regression - Default features + chromatic features") model = KernelRidge( kernel='linear' ) model.fit( train_matrix2 , train_labels2 ) results = model.predict( test_matrix2 ) print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) ) print ("") print ("Method = Random forest regression - Default features") model = RandomForestRegressor( n_estimators=100 , random_state=0 ) model.fit( train_matrix1 , train_labels1 ) results = model.predict( test_matrix1 ) print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) )
class RidgeMKL: """A MKL model in a transductive setting (test points are presented at training time). """ mkls = { "align": Align, "alignf": Alignf, "alignfc": Alignf, "uniform": UniformAlignment, } mkls_low_rank = { "align": AlignLowRank, "alignf": AlignfLowRank, "alignfc": AlignfLowRank, "uniform": UniformAlignmentLowRank, } # alignf expects kernels to be centered centered = {"alignf", "alignfc"} supervised = {"align", "alignf", "alignfc"} def __init__(self, lbd=0, method="align", method_init_args={}, low_rank=False): """ :param method: (``string``) "align", "alignf", or "uniform", MKL method to be used. :param low_rank: (``bool``) Use low-rank approximations. :param method_init_args: (``dict``) Initialization arguments for the MKL methods. :param lbd: (``float``) L2-regularization. """ self.method = method if not low_rank: self.mkl_model = self.mkls[method](**method_init_args) if method == "alignfc": init_args = method_init_args.copy() init_args["typ"] = "convex" self.mkl_model = self.mkls[method](**init_args) else: self.mkl_model = self.mkls_low_rank[method](**method_init_args) if method == "alignfc": init_args = method_init_args.copy() init_args["typ"] = "convex" self.mkl_model = self.mkls_low_rank[method](**init_args) self.lbd = lbd self.low_rank = low_rank self.trained = False def fit(self, Ks, y, holdout=None): """Learn weights for kernel matrices or Kinterfaces. :param Ks: (``list``) of (``numpy.ndarray``) or of (``Kinterface``) to be aligned. :param y: (``numpy.ndarray``) Class labels :math:`y_i \in {-1, 1}` or regression targets. :param holdout: (``list``) List of indices to exlude from alignment. """ # Expand kernel interfaces to kernel matrices expand = lambda K: K[:, :] if isinstance(K, Kinterface) else K Hs = map(expand, Ks) # Assert correct dimensions assert Ks[0].shape[0] == len(y) # Fit MKL model if self.method in self.supervised: self.mkl_model.fit(Hs, y, holdout=holdout) else: self.mkl_model.fit(Hs) if self.low_rank: self.X = hstack(map(lambda e: sqrt(e[0]) * e[1], zip(self.mkl_model.mu, Hs))) if self.method in self.centered: self.X = center_kernel_low_rank(self.X) self.X[where(isnan(self.X))] = 0 # Fit ridge model with given lbd and MKL model self.ridge = KernelRidge(alpha=self.lbd, kernel="linear", ) # Fit ridge on the examples minus the holdout set inxs = list(set(range(Hs[0].shape[0])) - set(holdout)) self.ridge.fit(self.X[inxs], y[inxs]) self.trained = True else: # Fit ridge model with given lbd and MKL model self.ridge = KernelRidge(alpha=self.lbd, kernel=self.mkl_model, ) # Fit ridge on the examples minus the holdout set inxs = array(list(set(range(Hs[0].shape[0])) - set(holdout))) inxs = inxs.reshape((len(inxs), 1)).astype(int) self.ridge.fit(inxs, y[inxs]) self.trained = True def predict(self, inxs): """ Predict values for data on indices inxs (transcductive setting). :param inxs: (``list``) Indices of samples to be used for prediction. :return: (``numpy.ndarray``) Vector of prediction of regression targets. """ assert self.trained if self.low_rank: return self.ridge.predict(self.X[inxs]) else: inxs = array(inxs) inxs = inxs.reshape((len(inxs), 1)).astype(int) return self.ridge.predict(inxs).ravel()
############## Prediction and save to file #################################### import os try: os.remove("/data/ISOTROPIC/data/KRR_rbf_sspacing4_tspacing6.nc") except OSError: pass ncfile2 = Dataset("/data/ISOTROPIC/data/KRR_rbf_sspacing4_tspacing6.nc", "w") ncfile1 = Dataset("/data/ISOTROPIC/data/data_downsampled4.nc", "r") # create the dimensions ncfile2.createDimension("Nt", Nt) ncfile2.createDimension("Nz", Nh) ncfile2.createDimension("Ny", Nh) ncfile2.createDimension("Nx", Nh) # create the var and its attribute var = ncfile2.createVariable("Urec", "d", ("Nt", "Nz", "Ny", "Nx")) for t in range(Nt): print("3D snapshot:", t) for i in range(Nh): xl = np.array(ncfile1.variables["velocity_x"][t, 0:Nh:sspacing, 0:Nh:sspacing, i]) # load only LR xl = np.divide(np.reshape(xl, (1, Nl * Nl)) - mea_l, sig_l) # pre-normalize xrec = np.multiply(kr.predict(xl), sig_h) + mea_h # re-normalize the prediction var[t, :, :, i] = np.reshape(xrec, (Nh, Nh)) # put to netcdf file # Close file ncfile1.close() ncfile2.close()
embeddings = Word2Vec.load_word2vec_format( "GoogleNews-vectors-negative300.bin.gz" , binary=True ) train_matrix = [ ] train_labels = [ ] for word,scores in affective.items(): try: train_matrix.append( embeddings[word] ) train_labels.append( scores ) except: continue model = KernelRidge( kernel='poly' , degree=4 ) model.fit( train_matrix , train_labels ) textdata = " ".join( open(sys.argv[1] + ".revised.txt",'r').readlines( ) ) tokenizer = Tokenizer(nb_words=max_words, filters=keras.preprocessing.text.base_filter(), lower=True, split=" ") tokenizer.fit_on_texts( textdata ) for word, index in tokenizer.word_index.items(): try: if not affective.has_key(word) : affective[word] = np.array( model.predict( np.array( embedding[word] ).reshape(1, -1) )[0] ) except: affective[word] = np.array( [ 5.0 , 5.0 , 5.0 ] ) # Process the textual contents textdata = "" file1 = open(sys.argv[1] + ".revised.txt",'r') with file1 as myfile: textdata = re.sub( ">", ">" , re.sub("<" , "<" , re.sub( "&" , "&" , re.sub( " +", "\n\n" , re.sub( "\t" , " ", re.sub( "\r" , "" , "".join( myfile.readlines() ) ) ) ) ) ) ) corenlp = StanfordCoreNLP( ) file2 = open(sys.argv[1] + ".annotated.tsv",'w') file3 = open(sys.argv[1] + ".annotated.xml",'w') print >>file2, "PARAGRAPH NUMBER\tENTITY TYPE\tENTITY\tCO-OCCURRING NOUNS\tCO-OCCURRING ADJECTIVES\tCO-OCCURRING VERBS\tVALENCE\tAROUSAL\tDOMINANCE\tSENTENCE" print >>file3, "<document name='" + sys.argv[1] + "'>" parnum = 0 sys.stdout.write("Processing text...") try: for paragraph in re.split("\n\n", textdata):
def parametrize_environment_specific(settings, rerun): channel_name = settings["embedding_options"]["channel_name"] log << log.mg << "Parametrizing" << channel_name << "model" << log.endl soap_types = SETTINGS["soap_types"] log << "Particle SOAP types are" << ", ".join(soap_types) << log.endl # PATHS - for example: # { "xyz_file": "data_esol/structures.xyz", # "soap_file": "data_esol/structures.soap", # "kmat_file": "data_esol/kernel.npy", # "targets_file": "data_esol/targets.npy", # "range_file": "data_esol/range.json", # "weights_file": "data_esol/weights.npy" } paths = copy.deepcopy(settings["paths"]) for p,v in paths.iteritems(): paths[p] = os.path.join(PATH, v) log << "Path to %s = %s" % (p, paths[p]) << log.endl configs = soap.tools.io.read(paths["xyz_file"]) # SOAP soap_options = SETTINGS["soap_options"][settings["soap_options_ref"]] if rerun or not os.path.isfile(paths["soap_file"]): log << "Make target: %s" % paths["soap_file"] << log.endl soap_configure_default(types=soap_types) dset = soap_evaluate(configs, soap_options, paths["soap_file"]) else: log << "Load target: %s" % paths["soap_file"] << log.endl dset = soap.DMapMatrixSet(paths["soap_file"]) # KERNEL kernel_options = settings["kernel_options"] if rerun or not os.path.isfile(paths["kmat_file"]): log << "Make target: %s" % paths["kmat_file"] << log.endl K = kernel_evaluate(dset, kernel_options, paths["kmat_file"]) else: log << "Load target: %s" % paths["kmat_file"] << log.endl K = np.load(paths["kmat_file"]) # TARGETS target_key = settings["regression_options"]["target_key"] if rerun or not os.path.isfile(paths["targets_file"]): log << "Make target: %s" % paths["targets_file"] << log.endl targets = np.array([float(c.info[target_key]) for c in configs]) np.save(paths["targets_file"], targets) else: log << "Load target: %s" % paths["targets_file"] << log.endl targets = np.load(paths["targets_file"]) # MODEL regr_options = settings["regression_options"] if rerun or not os.path.isfile(paths["weights_file"]): log << "Make target: %s" % paths["weights_file"] << log.endl y_avg = np.average(targets) krr = KernelRidge( alpha=regr_options["lreg"], kernel='precomputed') krr.fit(K**regr_options["xi"], targets) y_predict = krr.predict(K**regr_options["xi"]) kweights = krr.dual_coef_ np.save(paths["weights_file"], kweights) np.save(paths["pred_file"], y_predict) else: log << "Load target: %s" % paths["weights_file"] << log.endl kweights = np.load(paths["weights_file"]) y_predict = np.load(paths["pred_file"]) if rerun or not os.path.isfile(paths["range_file"]): dset_attr = soap.DMapMatrixSet(paths["soap_file"]) delta_Ys = kernel_attribute(dset_attr, dset, kernel_options, kweights, regr_options["xi"]) json.dump(delta_Ys, open(paths["range_file"], "w")) else: delta_Ys = json.load(open(paths["range_file"]))
for i in [t+1 for t in list(range(4))]: to_exclude = list(range(i)) folder_train = np.asarray(to_exclude).astype(int) #train index starts with the first folder index_train = index[folder_train]; index_test = [element for k, element in enumerate(index) if k not in to_exclude] #train set starts with the first folder X_train = X[np.hstack(index_train)] Y_train = Y[np.hstack(index_train)] model.fit(X_train, Y_train) rmse_folder = [] for item in index_test: folder_X = X[item] folder_Y = Y[item] # train on training sets Y_test_Pred = model.predict(folder_X) rmse = np.sqrt(mean_squared_error(folder_Y, Y_test_Pred)) rmse_folder.append(rmse) print(rmse_folder) rmse_list.append(rmse_folder) print(rmse_list) print("\n") #Plot sub = [221, 222, 223, 224] for i in list(range(4)): y = np.asarray(rmse_list[i])
param_grid = {"alpha": alphaVec, "kernel": [RBF(length_scale) for length_scale in sigmaVec]} kr = KernelRidge() kr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid) else: # Run with pre-defined parameter set kr = KernelRidge(alpha=alphaVec[0], kernel='rbf', gamma=sigmaVec[0]) # Fit model kr.fit(predictor.reshape(-1,1), predictand.reshape(-1,1)) # Get best parameters bestAlpha_kr = kr.best_params_['alpha'] bestSigma_kr = kr.best_params_['kernel'].length_scale # Predict over grid kr_fit = kr.predict(predictor_grid.reshape(-1,1)) # Compute derivatives of prediction kr_der1 = np.gradient(kr_fit[:,0]) kr_der2 = np.gradient(kr_der1) # Estimate decorrelation time KR if bestSigma_kr >= 2: minDer1 = 0.005 #0.001 else: minDer1 = 0.0 minNormSpread = 0.75 minNormSpread = 0.75*np.nanmedian(dt.from_dB(predictand)[dt.from_dB(predictor)+dB_shift_hr >= maxLeadTimeHours/2]) print('Minimum spread to reach:', minNormSpread) minNormSpread_dB = dt.to_dB(minNormSpread)
#MSE for SGD 292.104437304 #R2 for SGD 0.954873464267''' ####Develop models using various tuned algorithms above lr = LinearRegression() lr.fit(x_train, y_train) y_predicted = lr.predict(x_test) svr = SVR(C=10, gamma =1, kernel = 'linear') svr.fit(x_train_scaled, y_train) y2 = svr.predict(x_test_scaled) kr = KernelRidge(alpha=0.0001, coef0=1, degree=1, gamma=0.001, kernel='rbf',kernel_params=None) kr.fit(x_train_scaled, y_train) y3 = kr.predict(x_test_scaled) lasso = Lasso(alpha=1e-09) lasso.fit(x_train_scaled, y_train) y4 = lasso.predict(x_test_scaled) linear_ridge = Ridge(alpha=0.1) linear_ridge.fit(x_train_scaled,y_train) y5 = linear_ridge.predict(x_test_scaled) bayesian_ridge = BayesianRidge(alpha_1=1e-05, alpha_2=10, lambda_1=10, lambda_2=1e-05) bayesian_ridge.fit(x_train_scaled, y_train) y6 = bayesian_ridge.predict(x_test_scaled) sgd = SGDRegressor(alpha=0.1, epsilon=0.001, l1_ratio=0.2, loss='squared_loss', penalty='none', power_t=0.2) sgd.fit(x_train_scaled, y_train)
kr = KernelRidge(kernel='rbf', gamma=gamma1,alpha = alpha) t0 = time.time() svr.fit(X[:train_size], y[:train_size]) svr_fit = time.time() - t0 t0 = time.time() kr.fit(X[:train_size], y[:train_size]) kr_fit = time.time() - t0 t0 = time.time() y_svr = svr.predict(X_plot) svr_predict = time.time() - t0 t0 = time.time() y_kr = kr.predict(X_plot) kr_predict = time.time() - t0 xk = np.arange(18630+1440)[:,None] ############################################################################# # look at the results err1 = np.abs(svr.predict(X)-z)/z err2 = np.abs(kr.predict(X)-z)/z x1 = X.flatten() x2 = x1 x1 = pd.DataFrame({'x':x1,'svr error %':err1,'kr error %':err2}) x2 = pd.DataFrame({'svr predict':y_svr,'kr predict': y_kr}) x2.to_excel('/users/xuguodong/desktop/data1/solution results.xls') x1 = pd.melt(x1, id_vars=["x"], var_name="condition") sns.lmplot(data = x1, x = 'x', y = 'value', hue = 'condition', ci=None, scatter_kws={"s": 80},lowess = True) sv_ind = svr.support_
from molml.kernel import AtomKernel from utils import load_qm7 if __name__ == "__main__": # This is just boiler plate code to load the data Xin_train, Xin_test, y_train, y_test = load_qm7() # Look at just a few examples to be quick n_train = 200 n_test = 200 Xin_train = Xin_train[:n_train] y_train = y_train[:n_train] Xin_test = Xin_test[:n_test] y_test = y_test[:n_test] gamma = 1e-7 alpha = 1e-7 kern = AtomKernel(gamma=gamma, transformer=LocalEncodedBond(n_jobs=-1), n_jobs=-1) K_train = kern.fit_transform(Xin_train) K_test = kern.transform(Xin_test) clf = KernelRidge(alpha=alpha, kernel="precomputed") clf.fit(K_train, y_train) train_error = MAE(clf.predict(K_train), y_train) test_error = MAE(clf.predict(K_test), y_test) print("Train MAE: %.4f Test MAE: %.4f" % (train_error, test_error)) print()
for l in fin: p = l.strip().split("\t") px.append(float(p[0])) py.append(float(p[1])) ny = np.array(y) nx = np.array(x) pnx = np.array(px) pny = np.array(py) kr = KernelRidge(kernel='rbf', gamma=7.5e-5, alpha=0.001) kr.fit(nx[:, None], ny[:, None]) x_pred = np.linspace(min(x), max(x), 10000)[:, None] y_pred = kr.predict(x_pred) kr.fit(pnx[:, None], pny[:, None]) px_pred = np.linspace(min(px), max(px), 10000)[:, None] py_pred = kr.predict(px_pred) fig = plt.figure() ax = fig.add_subplot(111) """ These regions come from http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2562909/ v1: 66-99 v2: 137-242 v3: 433-497