class KRR_calibration:
    def __init__(self):
        self.model = 'KRR'

    def fit(self, X, p, Y, kernel_function='rbf', **kwargs):

        from sklearn.kernel_ridge import KernelRidge

        check_attributes(X, Y)

        self.model = KernelRidge(kernel=kernel_function, **kwargs)

        observed_bias = Y - p

        self.model.fit(X, observed_bias)

        return self.model

    def predict(self, X, p=None, mode='prob'):

        if mode == 'bias':
            return self.model.predict(X)
        elif mode == 'prob':
            return self.model.predict(X) + p.flatten()
        else:
            raise ValueError("Mode %s is not defined." % mode)
コード例 #2
0
def ridgeReg(X, y):

    X_train, X_test, y_train, y_test = train_test_split(np.array(X)[:, 6:],
                                                        np.array(y),
                                                        test_size=0.20,
                                                        random_state=1)
    #print(X_test)
    regr = KernelRidge(alpha=10, kernel="polynomial", gamma=0.5)
    regr.fit(X_train, y_train)
    y_pred = regr.predict(X_test)

    index = 0
    for i in y_pred:
        #print("ypred = " + str(i) + " y test = " + str(y_test[index]))
        index = index + 1

    #print('Coefficients: \n', regr.coef_)
    # The mean squared error
    print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
    # Explained variance score: 1 is perfect prediction
    print('Variance score: %.2f' % r2_score(y_test, y_pred))

    #What were the real predictions?
    y_pred_train = regr.predict(X_train)
    print("Mean squared error on the training set: %.2f" %
          mean_squared_error(y_train, y_pred_train))
    print("Mean squared error on the test set:     %.2f" %
          mean_squared_error(y_test, y_pred))
    print("size of X = ", str(len(y)))
コード例 #3
0
def choose_krr_alpha(train_x, test_x, train_y, test_y):
    alphas = [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.0]
    alpha_scores = []
    best_a_score = 0.0
    best_a = ""

    for a in alphas:
        krr = KernelRidge(kernel="laplacian", alpha=a)
        krr.fit(train_x, train_y)
        krr.predict(test_x)
        score = krr.score(test_x, test_y)
        if score > best_a_score:
            best_a_score = score
            best_a = a
        alpha_scores.append(score)

    print(alpha_scores)
    print("Best alpha: " + str(best_a))
    print("Score received: " + str(best_a_score))

    plt.plot(alphas, alpha_scores)
    plt.xlabel('Alpha')
    plt.ylabel('Score')
    plt.title('Tuning Alpha Hyperparameter for KRR')
    plt.show()
コード例 #4
0
def kernel_ridge(trainData_x, trainData_y, testData_x,
                 testData_y):  # Kernel ridge regression
    classifier = KernelRidge()
    classifier = classifier.fit(trainData_x, trainData_y)
    y_pred = classifier.predict(testData_x)
    y_train_pred = classifier.predict(trainData_x)
    results(testData_y, y_pred, trainData_y, y_train_pred, "KernelRidge")
コード例 #5
0
def choose_krr_gamma(train_x, test_x, train_y, test_y):
    gammas = [0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.0]
    gamma_scores = []
    best_g_score = 0.0
    best_g = ""

    for g in gammas:
        krr = KernelRidge(kernel="laplacian", gamma=g)
        krr.fit(train_x, train_y)
        krr.predict(test_x)
        score = krr.score(test_x, test_y)
        if score > best_g_score:
            best_g_score = score
            best_g = g
        gamma_scores.append(score)

    print(gamma_scores)
    print("Best gamma: " + str(best_g))
    print("Score received: " + str(best_g_score))

    plt.plot(gammas, gamma_scores)
    plt.xlabel('Gamma')
    plt.ylabel('Score')
    plt.title('Tuning Gamma Hyperparameter for KRR')
    plt.show()
コード例 #6
0
def prin(X,y,file,dic):
	t=100
	#clf = MLPRegressor(solver=dic['solver'],activation=dic['activation'],hidden_layer_sizes=eval(dic['hls']), batch_size = dic['batch_size'], max_iter=dic['max_iter'])
	#clf = LinearRegression()
	clf=KernelRidge(alpha=0.001,kernel='laplacian',degree=18)
	X_train, X_test, y_train, y_test= cross_validation.train_test_split(X,y,test_size=float(dic['test_size']))
	clf.fit(X_train, y_train)
	
	print 'Training size',len(X_train)
	print 'Testing size',len(X_test)
	#scores = cross_val_score(clf, X, y, cv=5)
	#print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

	accuracy = clf.score(X_train,y_train)
	print 'accuracy',accuracy,'\n'
	print 'RMSE',math.sqrt(metrics.mean_squared_error(y_test,clf.predict(X_test)))
	MAE=metrics.mean_absolute_error(y_test,clf.predict(X_test))
	print 'MAE',MAE 
	#X_test,y_test=X[-t:],y[-t:]
	#file=file[-t:]
	pr=clf.predict(X_test)
	print 'Filename                 Percentage Error         Actual Value      Predicted Value           Difference\n'
	for i in range (len(y_test)):
		if y_test[i]==0.0:
			y_test[i]=0.0000001
		predi=str(round(((pr[i]-y_test[i])/y_test[i])*100,2))+' %'
		print file[i]+' '*(20-len(file[i])),' '*(20-len(predi))+ predi, ' '*(20-len(str(y_test[i])))+str(y_test[i]) , ' '*(20-len(str(round(pr[i],2))))+str(round(pr[i],2)),' '*(20-len(str(round((y_test[i]-pr[i]),4))))+str(round((y_test[i]-pr[i]),4))
	#print 'Mean square Error',mean_squared_error(X,pr)
	#print 'R2 score',r2_score(X,pr)
	#test(X,y,file,clf.coef_[0],clf.intercept_[0])
	#plot_g(clf)
	return MAE
コード例 #7
0
	def KRR_CV(self, trainX, testX, trainY, testY):
		kernel_vals = ['rbf', 'laplacian']
		kernel_indices = [0,1]
		inverse_gamma_vals = [1.0, 10.0, 20.0, 40.0, 80.0]
		alpha_vals = [0.0001, 0.001, 0.01, 0.1, 1.0]
		cv_errors = np.empty([len(kernel_vals)*len(inverse_gamma_vals)*len(alpha_vals), 4])
		i = 0
		for kern in kernel_vals:
			for g in inverse_gamma_vals:
				for a in alpha_vals:
					errors = np.empty([self.cv_split_no, 1])
					kf = KFold(n_splits=self.cv_split_no, random_state=30, shuffle=True)
					j = 0
					for train_indices, validation_indices in kf.split(trainX):
						training_set_X, validation_set_X = trainX[train_indices], trainX[validation_indices]
						training_set_Y, validation_set_Y = trainY[train_indices], trainY[validation_indices]
						regr = KernelRidge(alpha=a, gamma=1.0/g, kernel=kern)
						regr.fit(training_set_X, training_set_Y)
						predY = regr.predict(validation_set_X)
						errorY = np.absolute(predY - validation_set_Y)
						errors[j] = np.mean(errorY)
						j = j + 1
					cv_errors[i,:] = kernel_indices[kernel_vals.index(kern)], g, a, np.mean(errors)
					i = i + 1
		k_opt, g_opt, a_opt, _ = cv_errors[np.argmin(cv_errors[:, 3]), :]
		k_opt = kernel_vals[kernel_indices.index(k_opt)]
		regr = KernelRidge(alpha=a_opt, gamma=1.0/g_opt, kernel=k_opt)
		regr.fit(trainX, trainY)
		predY = regr.predict(testX)
		err_on_opt_params = np.absolute(predY - testY)                 
		return err_on_opt_params
コード例 #8
0
    def train_krrl_linear(self, data):
        train, validacion = data
        x_tr, y_tr = train
        x_val, y_val = validacion
        #print("El set de train tiene {} filas y {} columnas".format(x_tr.shape[0],x_tr.shape[1]))
        #print("El set de validacion tiene {} filas y {} columnas".format(x_val.shape[0],x_val.shape[1]))

        print('Start training KernerRidge with linear kernel...')
        start_time = self.timer()

        krrl = KernelRidge(alpha=1)
        krrl.fit(x_tr, y_tr)
        print("The R2 is: {}".format(krrl.score(x_tr, y_tr)))
        #		print("The alpha choose by CV is:{}".format(krrl.alpha_))
        self.timer(start_time)

        print("Making prediction on validation data")
        y_val = np.expm1(y_val)
        y_val_pred = np.expm1(krrl.predict(x_val))
        mae = mean_absolute_error(y_val, y_val_pred)
        print("El mean absolute error de es {}".format(mae))

        print('Saving model into a pickle')
        try:
            os.mkdir('pickles')
        except:
            pass

        with open('pickles/krrlLinearK.pkl', 'wb') as f:
            pickle.dump(krrl, f)

        print('Making prediction and saving into a csv')
        y_test = krrl.predict(self.x_test)

        return y_test
コード例 #9
0
ファイル: KernelRR.py プロジェクト: the-hktran/6.862Project
def RunKernel(XTrain, YTrain, XVal, YVal, XTest, YTest):
    print("Optimizing Kernel Ridge Regression Parameters")
    #BestAlpha, BestGamma = DoGridSearch(XTrain, YTrain.ravel())
    BestAlpha = 0.01
    BestGamma = 0.001
    KRR = KernelRidge(kernel='laplacian', gamma=BestGamma, alpha=BestAlpha)
    KRR.fit(XTrain, YTrain.ravel())

    YPredTrain = KRR.predict(XTrain)
    DiffYTrain = abs(YPredTrain - YTrain.ravel())
    print(sum(DiffYTrain) / float(len(DiffYTrain)))

    YPred = KRR.predict(XTest)
    DiffY = abs(YPred - YTest.ravel())
    MAEPredicted = sum(DiffY) / float(len(DiffY))
    print(BestAlpha, BestGamma)
    print(MAEPredicted)

    plt.scatter(YTest.tolist(), YPred.tolist(), c='red', s=5)
    plt.plot(np.linspace(0, 0.5, 2), np.linspace(0, 0.5, 2))
    plt.ylabel('Predicted Excitation Energy (a.u.)')
    plt.xlabel('True Excitation Energy (a.u.)')
    plt.title(
        'Kernel Ridge Regression (Laplacian) Learned Excitation Energies')
    plt.show()


#RunKernel()
コード例 #10
0
def choose_krr_kernel(train_x, test_x, train_y, test_y):
    kernels = ['linear', 'rbf', 'laplacian', 'polynomial', 'sigmoid']
    kernel_scores = []
    best_k_score = 0.0
    best_k = ""

    for k in kernels:
        krr = KernelRidge(kernel=k)
        krr.fit(train_x, train_y)
        krr.predict(test_x)
        score = krr.score(test_x, test_y)
        if score > best_k_score:
            best_k_score = score
            best_k = k
        kernel_scores.append(score)

    print(kernel_scores)
    print("Best kernel: " + str(best_k))
    print("Score received: " + str(best_k_score))

    plt.bar(kernels, kernel_scores)
    plt.xlabel('Kernel')
    plt.ylabel('Score')
    plt.xticks(np.arange(len(kernels)), kernels)
    plt.title('Tuning Kernel Hyperparameter for KRR')
    plt.show()
コード例 #11
0
def kernel_ridge(trainData_x, trainData_y, testData_x, testData_y,
                 COSMIC_num):  # Kernel ridge regression
    classifier = KernelRidge(alpha=0.1)
    classifier = classifier.fit(trainData_x, trainData_y)
    y_pred = classifier.predict(testData_x)
    y_train_pred = classifier.predict(trainData_x)
    results(testData_y, y_pred, trainData_y, y_train_pred, "KernelRidge",
            COSMIC_num)
コード例 #12
0
class KernelRidgeRegression(Oracle):
    def __init__(self, kernel='rbf'):
        self.kernel = kernel
        self.model = KernelRidge(alpha=1,
                                 kernel=kernel,
                                 gamma=None,
                                 degree=5,
                                 coef0=1,
                                 kernel_params=None)

    def predict(self, X_nxp: np.array):
        return self.model.predict(X_nxp), self.oracle_std * np.ones(
            (X_nxp.shape[0]))

    def fit(self,
            X_nxp: np.array,
            gt_n: np.array,
            weights_n: np.array = None,
            k_estimate_var: int = 4,
            epochs: int = None,
            seed: int = None,
            verbose: bool = False):
        if weights_n is None:
            weights_n = np.ones([gt_n.size])

        # ------ fit oracle variance -----
        kf = KFold(n_splits=k_estimate_var, shuffle=True)
        kf.get_n_splits(X_nxp)
        oracle_var = 0.0
        for k, idx in enumerate(kf.split(X_nxp)):
            train_idx, val_idx = idx
            xtr_nx1, xval_nx1 = X_nxp[train_idx], X_nxp[val_idx]
            ytr_n, yval_n = gt_n[train_idx], gt_n[val_idx]
            wtr_n, wval_n = weights_n[train_idx], weights_n[val_idx]
            self.model.fit(xtr_nx1, ytr_n, sample_weight=wtr_n)
            oracle_var += np.mean(
                wval_n * np.square(self.model.predict(xval_nx1) - yval_n))
        oracle_var /= float(k_estimate_var)
        oracle_std = np.sqrt(oracle_var)
        self.oracle_std = oracle_std
        self.model.fit(X_nxp, gt_n, sample_weight=weights_n)

    def get_parameters(self):
        return self.model, self.oracle_std

    def set_parameters(self, value):
        self.model = deepcopy(value[0])
        self.oracle_std = value[1]

    parameters = property(get_parameters, set_parameters)

    def get_initialization_kwargs(self):
        return {'kernel': self.kernel}

    def save(self, savepath: str):
        print("Not saving KernelRidgeRegression.")
コード例 #13
0
def krr_base_model():
    maes = []
    rmses = []
    submission = pd.read_csv(os.path.join(DATA_DIR, 'sample_submission.csv'),
                             index_col='seg_id')
    scaled_train_X = pd.read_csv('./result/scaled_train_X.csv')
    scaled_test_X = pd.read_csv('./result/scaled_test_X.csv')
    train_y = pd.read_csv('./result/train_y.csv')
    predictions = np.zeros(len(scaled_test_X))

    n_fold = 8
    folds = KFold(n_splits=n_fold, shuffle=True, random_state=42)

    fold_importance_df = pd.DataFrame()
    fold_importance_df["Feature"] = scaled_train_X.columns

    for fold_, (trn_idx, val_idx) in enumerate(
            folds.split(scaled_train_X, train_y.values)):
        print('working fold %d' % fold_)
        strLog = "fold {}".format(fold_)
        print(strLog)

        X_tr, X_val = scaled_train_X.iloc[trn_idx], scaled_train_X.iloc[
            val_idx]
        y_tr, y_val = train_y.iloc[trn_idx], train_y.iloc[val_idx]
        y_tr = y_tr['time_to_failure']
        y_val = y_val['time_to_failure']

        model = KernelRidge(kernel='rbf', alpha=0.001, gamma=0.001)
        model.fit(X_tr, y_tr)

        # predictions
        preds = model.predict(scaled_test_X)
        predictions += preds / folds.n_splits
        preds = model.predict(X_val)

        # mean absolute error
        mae = mean_absolute_error(y_val, preds)
        print('MAE: %.6f' % mae)
        maes.append(mae)

        # root mean squared error
        rmse = mean_squared_error(y_val, preds)
        print('RMSE: %.6f' % rmse)
        rmses.append(rmse)

        # fold_importance_df['importance_%d' % fold_] = model.feature_importances_[:len(scaled_train_X.columns)]

    print('MAEs', maes)
    print('MAE mean: %.6f' % np.mean(maes))
    print('RMSEs', rmses)
    print('RMSE mean: %.6f' % np.mean(rmses))

    submission['time_to_failure'] = predictions
    submission.to_csv('submission_krr_8.csv')
    return predictions
コード例 #14
0
class VADEstimator(BaseEstimator):
  def fit( self, x , y , size=1 ):
    self.model = Sequential()
    self.model.add(Dense( int( embeddings_dim / 2.0 ) , input_dim=embeddings_dim , init='uniform' , activation='tanh'))
    self.model.add(Dense( int( embeddings_dim / 4.0 ) , init='uniform' , activation='tanh'))
    self.model.add(Dense(size , init='uniform' ) )
    self.model.compile(loss='mse', optimizer='rmsprop')
    self.model = KernelRidge( kernel='rbf' )
    self.model.fit( x , y )
  def predict( self, x ): 
    if isinstance( self.model , Sequential ): return self.model.predict( x , verbose=0 )[ 0 ]
    return self.model.predict( x )
コード例 #15
0
def krr_linear(i, train, test, M, alpha, limit=None):
    if limit:
        (Xtrain,Ytrain),(Xtest,Ytest) = train_test_build(i,train,test,M,limit)
    else:
        (Xtrain,Ytrain),(Xtest,Ytest) = train_test_build(i,train,test,M)
    
    clf = KernelRidge(alpha=alpha)
    clf.fit(Xtrain,Ytrain)
    pred_test = clf.predict(Xtest)
    pred_train = clf.predict(Xtrain)
    train_error = sum((pred_train-Ytrain)**2)
    test_error = sum((pred_test-Ytest)**2)
    
    return (train_error, test_error)
コード例 #16
0
ファイル: svr.py プロジェクト: czosel/aml
def kernel_ridge(X, X_test, y, params):
    alpha = params.get("alpha", 1)
    gamma = params.get("gamma", 0.1)
    kernel = params.get("kernel", "rbf")
    kf = KFold(n_splits=5, shuffle=True)
    scores = []
    pred = np.array(len(y))
    for train, test in kf.split(X, y):
        regr = KernelRidge(alpha=alpha, kernel=kernel, gamma=gamma)
        regr.fit(X[train], y[train])
        pred[test] = regr.predict(X[test])
        scores.append(sklearn.metrics.r2_score(pred[test], y[test]))

    regr.fit(X, y)
    return regr.predict(X_test), scores, pred
コード例 #17
0
def ridgeRegression(ATrain, performanceTrain, distortionTrain, ATest,
                    performanceTest, distortionTest):
    model = KernelRidge(alpha=0.01, kernel='sigmoid')
    model.fit(ATrain, performanceTrain)
    performancePred = model.predict(ATest)
    performanceErr = sum(
        abs(performancePred - performanceTest)) / len(performanceTest)
    print 'Kernel ridge performance error: ', performanceErr
    model.fit(ATrain, distortionTrain)
    distortionPred = model.predict(ATest)
    distortionErr = sum(
        abs(distortionPred - distortionTest)) / len(distortionTest)
    print 'Kernel ridge distortion error: ', distortionErr
    histoPlot(performancePred, performanceTest)
    histoPlot(distortionPred, distortionTest)
コード例 #18
0
class KernelRidgeImpl():
    def __init__(self,
                 alpha=1,
                 kernel='linear',
                 gamma=None,
                 degree=3,
                 coef0=1,
                 kernel_params=None):
        self._hyperparams = {
            'alpha': alpha,
            'kernel': kernel,
            'gamma': gamma,
            'degree': degree,
            'coef0': coef0,
            'kernel_params': kernel_params
        }
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
コード例 #19
0
def kernel_ridge_pre(X_train, y_train, X_pre, val):
    kernel = KernelRidge(kernel=val['kernel'],
                         alpha=val['alpha'],
                         gamma=val['gamma'])
    kernel.fit(X_train, y_train)
    y_pre = kernel.predict(X_pre)
    return y_pre
コード例 #20
0
ファイル: example.py プロジェクト: jwkvam/bowtie-demo
def mainregress(selection, alpha):
    if len(selection) < 2:
        return

    x = xdown.get()['value']
    y = ydown.get()['value']

    tabdata = []
    mldatax = []
    mldatay = []
    species = iris.Species.unique()
    for i, p in enumerate(selection['points']):
        mldatax.append(p['x'])
        mldatay.append(p['y'])
        tabdata.append({
            x: p['x'],
            y: p['y'],
            'species': species[p['curve']]
        })


    X = np.c_[mldatax, np.array(mldatax) ** 2]
    ridge = KernelRidge(alpha=alpha).fit(X, mldatay)

    xspace = np.linspace(min(mldatax)-1, max(mldatax)+1, 100)

    plot = pw.scatter(mldatax, mldatay, label='train', markersize=15)
    for i, df in iris.groupby('Species'):
        plot += pw.scatter(df[x], df[y], label=i)
    plot += pw.line(xspace, ridge.predict(np.c_[xspace, xspace**2]), label='model', mode='lines')
    plot.xlabel = x
    plot.ylabel = y
    linear.do_all(plot.dict)
    table1.do_data(pd.DataFrame(tabdata))
コード例 #21
0
def ridge_regression(K1, K2, y1, y2, alpha, c):
    n_val, n_train = K2.shape
    clf = KernelRidge(kernel="precomputed", alpha=alpha)
    one_hot_label = np.eye(c)[y1] - 1.0 / c
    clf.fit(K1, one_hot_label)
    z = clf.predict(K2).argmax(axis=1)
    return 1.0 * np.sum(z == y2) / n_val
コード例 #22
0
 def get_SVM_NTK(self, for_test: bool):
     if self.params['kernel_ridge']:
         clf = KernelRidge(alpha=self.params['ridge_coef'][0],
                           kernel="precomputed")
     else:
         clf = SVR(kernel="precomputed",
                   C=self.params['svm_coef'][0],
                   epsilon=self.params['svm_coef'][1],
                   cache_size=100000)
     output = []
     train = not for_test
     Ys_ = self.test_Ys_ if for_test else self.Ys_
     N = self.N_test if for_test else self.N_train
     for idx in range(N):
         NTK_train = self.get_ntk(fst_train=train,
                                  fst_idx=idx,
                                  fst_qry=False,
                                  snd_train=train,
                                  snd_idx=idx,
                                  snd_qry=False,
                                  ridge=True)
         NTK_test = self.get_ntk(fst_train=train,
                                 fst_idx=idx,
                                 fst_qry=True,
                                 snd_train=train,
                                 snd_idx=idx,
                                 snd_qry=False,
                                 ridge=False)
         y = Ys_[idx]
         time_evolution = self.time_evolution(NTK_train,
                                              self.params['inner_lr'])
         clf.fit(X=NTK_train, y=time_evolution @ y)
         pred = clf.predict(X=NTK_test)
         output.append(pred)
     return np.concatenate(output)
コード例 #23
0
 def run(self, ind_sampling, ind_fold):
     if self.fold_setting=="S4":
         nb_fold = self.nb_fold * self.nb_fold
     self.load_CV_indexes(ind_sampling)
     
     if self.CV_type == 'ClusterCV_':
             ajout = self.CV_type
     else:
         ajout = 'CV_'
     
     K_train, K_test = self.make_Ktrain_and_Ktest_MT_with_settings(self.samples_tr[ind_fold], self.samples_te[ind_fold])
     pred_score = []
     for param in range(len(self.list_param)):
         if self.type_clf=="SVM":
             clf = svm.SVC(kernel='precomputed', C=self.list_param[param])
             clf.fit(K_train, self.labels_tr[ind_fold])
             Y_test_score = clf.decision_function(K_test).tolist()
         elif self.type_clf=="KernelRidge":
             clf = KernelRidge(alpha=self.list_param[param], kernel='precomputed')
             clf.fit(K_train, inner_labels_tr[ind_fold])
             Y_test_score = clf.predict(K_test).tolist()
         else:
             raise ValueError('invalid value of type_clf')
         pred_score.append(Y_test_score)
         del clf
         del Y_test_score
     pickle.dump(pred_score, open('saved_results/MT/MT_'+str(self.nb_fold)+'fold'+ajout+self.fold_setting+"_"+self.type_clf+"_"+str(ind_fold)+"_"+str(ind_sampling)+".data", 'wb'))
     del K_train
     del K_test
コード例 #24
0
def choose_alpha_ridge(X, y, range_C, gammaX, plot_color):
    '''Implement 5 fold cv to determine optimal gamma'''
    
    #Param setup
    kf = KFold(n_splits = 5)
    mean_error=[]; std_error=[];
    
    for C in range_C:
        #Params
        mse_temp = []
        #Model
        model = KernelRidge(alpha= 1.0/(2*C), kernel= 'rbf', gamma=gammaX)    
        
        #5 fold CV           
        for train, test in kf.split(X):
            #Model
            model.fit(X[train], y[train])
            ypred = model.predict(X[test])
            mse = mean_squared_error(y[test], ypred)
            mse_temp.append(mse)
            
        #Get mean & variance
        mean_error.append(np.array(mse_temp).mean())
        std_error.append(np.array(mse_temp).std())
        
    #Plot
    fig = plt.figure(figsize=(15,12))
    plt.errorbar(range_C, mean_error, yerr=std_error, color = plot_color)
    plt.xlabel('C')
    plt.ylabel('Mean square error')
    plt.title('Choice of C in kernelised Ridge Regression - 5 fold CV, gamma = {}'.format(gammaX))
    plt.show()
def local_bias_estimator(X,
                         Y,
                         p,
                         X_grid,
                         model='KRR',
                         kernel_function='rbf',
                         **kwargs):

    check_attributes(X, Y)

    if model == 'KRR':
        from sklearn.kernel_ridge import KernelRidge
        model = KernelRidge(kernel=kernel_function, **kwargs)
        # kr = KernelRidge(alpha=alpha, kernel='rbf', **kwargs)
    elif model == 'SVR':
        from sklearn.svm import SVR
        model = SVR(kernel=kernel_function, **kwargs)
    elif model == 'EWF':
        K = pairwise_kernels(X, X_grid, metric=kernel_function, **kwargs)
        p_err = Y - p
        bias = np.sum(p_err.flatten() * K.T, axis=1) / np.sum(K.T, axis=1)
        return bias
    else:
        raise ValueError("Model %s is not defined." % model)

    bias_calibration = Y - p

    model.fit(X, bias_calibration)
    bias = model.predict(X_grid)

    return bias
コード例 #26
0
def get_reconstruction_error(ct, data, nsplits=4, clf='kridge'):
    tasknames = [i.split('.')[0] for i in data.columns]
    tasks = list(set(tasknames))
    tasks.sort()
    chosen_vars = []
    #print(ct,tasks,tasknames)
    for i in ct:
        vars = [
            j for j in range(len(tasknames))
            if tasknames[j].split('.')[0] == tasks[i]
        ]
        chosen_vars += vars
    kf = KFold(n_splits=nsplits, shuffle=True)
    fulldata = data.values
    #subdata=data.ix[:,chosen_vars].values
    if clf == 'kridge':
        linreg = KernelRidge(alpha=1)
    elif clf == 'rf':
        linreg = RandomForestRegressor()
    else:
        linreg = LinearRegression()
    scaler = StandardScaler()
    pred = numpy.zeros(fulldata.shape)
    for train, test in kf.split(fulldata):
        #fulldata_train=fulldata[train,:]
        #fulldata_test=fulldata[test,:]
        # fit scaler to train data and apply to test
        fulldata_train = scaler.fit_transform(fulldata[train, :])
        fulldata_test = scaler.transform(fulldata[test, :])
        subdata_train = fulldata_train[:, chosen_vars]
        subdata_test = fulldata_test[:, chosen_vars]
        linreg.fit(subdata_train, fulldata_train)
        pred[test, :] = linreg.predict(subdata_test)
    cc = numpy.corrcoef(scaler.transform(fulldata).ravel(), pred.ravel())[0, 1]
    return cc
コード例 #27
0
def kernel_ridge(X_train, y_train, X_test, y_test, val):
    kernel = KernelRidge(kernel=val['kernel'],
                         alpha=val['alpha'],
                         gamma=val['gamma'])
    kernel.fit(X_train, y_train)
    y_pre = kernel.predict(X_test)
    r2, mse = show_metrics('Kernel Ridge', y_test, y_pre)
    return r2, mse
コード例 #28
0
def test_kernel_ridge_singular_kernel():
    # alpha=0 causes a LinAlgError in computing the dual coefficients,
    # which causes a fallback to a lstsq solver. This is tested here.
    pred = Ridge(alpha=0, fit_intercept=False).fit(X, y).predict(X)
    kr = KernelRidge(kernel="linear", alpha=0)
    ignore_warnings(kr.fit)(X, y)
    pred2 = kr.predict(X)
    assert_array_almost_equal(pred, pred2)
コード例 #29
0
def test_kernel_ridge_singular_kernel():
    # alpha=0 causes a LinAlgError in computing the dual coefficients,
    # which causes a fallback to a lstsq solver. This is tested here.
    pred = Ridge(alpha=0, fit_intercept=False).fit(X, y).predict(X)
    kr = KernelRidge(kernel="linear", alpha=0)
    ignore_warnings(kr.fit)(X, y)
    pred2 = kr.predict(X)
    assert_array_almost_equal(pred, pred2)
コード例 #30
0
def outofsample_extensions(method='kernel-regression'):
    # Load the data and init seeds
    train_data, train_labels, test_data, test_labels = load_mnist(
        dataset_path='data')
    np.random.seed(1)
    sklearn.utils.check_random_state(1)
    n_train_samples = 5000

    # Learn a new space using Isomap
    isomap = Isomap(n_components=10, n_neighbors=20)
    train_data_isomap = np.float32(
        isomap.fit_transform(train_data[:n_train_samples, :]))
    sigma = mean_data_distance(np.float32(train_data[:n_train_samples, :]))

    if method == 'kernel-regression':
        # Use kernel regression to provide baseline out-of-sample extensions
        proj = KernelRidge(kernel='rbf', gamma=(1.0 / sigma**2))
        proj.fit(np.float64(train_data[:n_train_samples, :]),
                 np.float64(train_data_isomap))
        acc = evaluate_svm(proj.predict(train_data[:n_train_samples, :]),
                           train_labels[:n_train_samples],
                           proj.predict(test_data), test_labels)
    elif method == 'cK-ISOMAP-10d' or method == 'cK-ISOMAP-20d':
        # Use the SEF to provide out-of-sample extensions
        if method == 'cK-ISOMAP-10d':
            dims = 10
        else:
            dims = 20

        proj = KernelSEF(train_data[:n_train_samples],
                         train_data.shape[1],
                         output_dimensionality=dims)
        proj.cuda()
        loss = proj.fit(data=train_data[:n_train_samples, :],
                        target_data=train_data_isomap,
                        target='copy',
                        epochs=100,
                        batch_size=128,
                        verbose=True,
                        learning_rate=0.00001,
                        regularizer_weight=0.001)
        acc = evaluate_svm(proj.transform(train_data[:n_train_samples, :]),
                           train_labels[:n_train_samples],
                           proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
コード例 #31
0
def AlgoKRR(df_train, df_trainY):  #
    model = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)
    rmsle_cv(model, df_train, df_trainY)
    model.fit(df_train, df_trainY)
    result = model.predict(df_train)
    print("rms value of same set: ",
          np.around(sqrt(mean_squared_error(df_trainY, result)), decimals=7))
    return model
コード例 #32
0
ファイル: main.py プロジェクト: Learning-Kernel-Group/final-
 def uniform(self, method=None):
     tmp = self.make_test_kernels(self.x,
                                  self.x,
                                  subsampling=self.subsampling)
     mu = np.ones(self.n_features) / self.n_features
     g = self.sum_weight_kernels(tmp, mu)**self.degree
     yPredictR = KernelRidge.predict(self.gTest)
     yPredictC = 2 * (self.yPredictR >= 0.) - 1
コード例 #33
0
ファイル: volatility.py プロジェクト: wgcgxp/zjsxzy_in_js
def train_test_model(vol_df, short_term_days, features, model, train_date,
                     test_date):
    predX = vol_df.ix[-short_term_days - 1:, features]
    pred_end_date = w.tdaysoffset(short_term_days, predX.index[-1]).Data[0][0]
    pred_dates = w.tdays(predX.index[-1].strftime("%Y-%m-%d"),
                         pred_end_date.strftime("%Y-%m-%d")).Times
    predX.index = pred_dates

    vol_df[features] = vol_df[features].shift(short_term_days)
    vol_df.dropna(inplace=True)
    train_df = vol_df[vol_df.index <= test_date]
    test_df = vol_df[vol_df.index >= test_date]
    train_df = train_df.append(test_df.ix[0])  # 为了让图看上去连续
    trainX, trainY = train_df[features], train_df['vol']
    testX, testY = test_df[features], test_df['vol']

    # 用Lasso来做feature selection
    lasso = linear_model.Lasso(alpha=0.0005)
    lasso.fit(trainX, trainY)
    sfm = SelectFromModel(lasso, prefit=True)
    fea_trainX = sfm.transform(trainX)
    fea_testX = sfm.transform(testX)
    fea_predX = sfm.transform(predX)

    if model == "LinearRegression":
        reg_model = linear_model.LinearRegression()
    elif model == "KernelRidgeRegression":
        reg_model = KernelRidge(kernel='rbf')
    elif model == "SupportVectorRegression":
        reg_model = SVR(kernel="linear")
    elif model == "Ridge":
        reg_model = linear_model.Ridge()
    elif model == "RandomForestRegression":
        reg_model = RandomForestRegressor()
    elif model == "AdaBoostRegression":
        reg_model = AdaBoostRegressor()
    else:
        return NotImplementedError

    reg_model.fit(fea_trainX, trainY)
    train_pred = reg_model.predict(fea_trainX)
    test_pred = reg_model.predict(fea_testX)
    pred_pred = reg_model.predict(fea_predX)
    return (pd.DataFrame({'pred': train_pred}, index=trainX.index),
            pd.DataFrame({'pred': test_pred}, index=testX.index),
            pd.DataFrame({'pred': pred_pred}, index=predX.index))
コード例 #34
0
ファイル: lgo.py プロジェクト: aatapa/RLScore
def lgo_sklearn(X,y, groups, regparam):
    logo = LeaveOneGroupOut()
    errors = []
    for train, test in logo.split(X, y, groups=groups):
        rls = KernelRidge(kernel="rbf", gamma=0.01)
        rls.fit(X[train], y[train])
        p = rls.predict(X[test])
        e = sqerror(y[test], p)       
        errors.append(e)
    return np.mean(errors)
コード例 #35
0
ファイル: lpo.py プロジェクト: aatapa/RLScore
def lpo_sklearn(X,y, regparam):
    lpo = LeavePOut(p=2)
    preda = []
    predb = []
    for train, test in lpo.split(X):
        rls = KernelRidge(kernel="rbf", gamma=0.01)
        rls.fit(X[train], y[train])
        p = rls.predict(X[test])
        preda.append(p[0])
        predb.append(p[1])
    return preda, predb
コード例 #36
0
    def ANM_causation_score(self,train_size=0.5,independence_criterion='HSIC',metric='linear',regression_method='GP'):
        '''
            Measure how likely a given causal direction is true

            Parameters
            ----------
            train_size :
                Fraction of given data used to training phase

            independence_criterion :
                kruskal for Kruskal-Wallis H-test,
                HSIC for Hilbert-Schmidt Independence Criterion

            metric :
                linear, sigmoid, rbf, poly
                kernel function to compute gramm matrix for HSIC
                gaussian kernel is used in :
                Nonlinear causal discovery with additive noise models
                Patrik O. Hoyer et. al

            Returns
            -------
            causal_strength: A float between 0. and 1.
        '''
        Xtrain, Xtest , Ytrain, Ytest = train_test_split(self.X, self.Y, train_size = train_size)
        if regression_method == 'GP':
            _gp = pyGPs.GPR()      # specify model (GP regression)
            _gp.getPosterior(Xtrain, Ytrain) # fit default model (mean zero & rbf kernel) with data
            _gp.optimize(Xtrain, Ytrain)     # optimize hyperparamters (default optimizer: single run minimize)

            #Forward case
            #_gp = KernelRidge(kernel='sigmoid',degree=3)
            #_gp.fit(Xtrain,Ytrain)
            ym, ys2, fm, fs2, lp = _gp.predict(Xtest)
            #_gp.plot()
            #errors_forward = _gp.predict(Xtest) - Ytest
            errors_forward = ym - Ytest
        else:
            _gp = KernelRidge(kernel='sigmoid')
            _gp.fit(Xtrain, Ytrain)
            errors_forward = _gp.predict(Xtest) - Ytest

        #Independence score

        forward_indep_pval = {
            'kruskal': kruskal(errors_forward,Xtest)[1],
            'HSIC': self.HilbertSchmidtNormIC(errors_forward,Xtest,metric=metric)[1]
        }[independence_criterion]


        return {'causal_strength':forward_indep_pval}
コード例 #37
0
ファイル: contours3.py プロジェクト: RossHart/astro_codes
def xyz_kde(xyz,gamma,N_grid=100):
    xy = xyz[:,:-1]
    z = xyz[:,-1]
    
    x_edges = np.linspace(np.min(xy[:,0]),np.max(xy[:,0]),N_grid+1)
    y_edges = np.linspace(np.min(xy[:,1]),np.max(xy[:,1]),N_grid+1)
    x_centres = np.array([x_edges[b] + (x_edges[b+1]-x_edges[b])/2 
                          for b in range(N_grid)])
    y_centres = np.array([y_edges[b] + (y_edges[b+1]-y_edges[b])/2 
                          for b in range(N_grid)])
    x_grid, y_grid = np.meshgrid(x_centres,y_centres)
    xy_grid = np.array([np.ravel(x_grid),np.ravel(y_grid)]).T
    clf = KernelRidge(kernel='rbf',gamma=gamma).fit(xy,z)
    H = clf.predict(xy_grid).reshape(N_grid,N_grid)
    return H, x_grid, y_grid, gamma
コード例 #38
0
def plot_kernel_ridge(X, y, gamma=0.5, alpha=0.1):
    # kernel (ridge) regression
    krr = KernelRidge(kernel="rbf", gamma=gamma, alpha=alpha);
    krr.fit(X,y);

    # predict
    x_plot = np.linspace(min(X), max(X), 100)[:,np.newaxis];
    y_plot = krr.predict(x_plot);

    # plot
    plt.figure(figsize=(8,4.8));
    plt.plot(X, y, 'or');
    plt.plot(x_plot, y_plot)
#     plt.title(r"Gaussian Kernel ($\gamma=%0.2f, \alpha=%0.2f$)" % (gamma,alpha), fontsize=16)
    plt.title(r"Gaussian Kernel ($\gamma=%0.2f$)" % (gamma), fontsize=16)
def modelfitOne(train_X, train_y, test_X, yd, ImageId, FeatureName):
    n_clf = 1
    # 拟合器
    clf = KernelRidge(kernel='rbf', gamma=6e-4, alpha=2e-2)
    # 训练
    print('-----------------开始训练...------------------')
    clf.fit(train_X, train_y)
    # 预测
    print('-----------------开始预测...------------------')
    pred = clf.predict(test_X)
    predicted = np.zeros(len(FeatureName))
    for i in range(len(FeatureName)):
        if i % 500 == 0:
            print('i =', i)
        else:
            pass
        imageID = ImageId[i]
        clfID = yd[FeatureName[i]]
        predicted[i] = pred[imageID, clfID]
    predicted = predicted*48.+48.
    return predicted
コード例 #40
0
ファイル: learner.py プロジェクト: sammystayz/sandbox
class Learner():

    path = 'matrices/'
    inputF = 'inputs.npy'
    stateF = 'states.npy'
    itrF = 'itr.npy'
    inptFile = os.path.join(path, inputF)
    stateFile = os.path.join(path, stateF)
    itrFile = os.path.join(path, itrF)

    itr = np.array([])

    useSHIV = False
    THRESH = 0.45
    ahqp_solver_g = AHQP(sigma=6)
    ahqp_solver_b = AHQP(sigma=5,nu=1e-3)


    def trainModel(self, s=None, a=None):
        """
        Trains model on given states and actions.
        Uses neural net or SVM based on global
        settings.
        """
        states, actions = self.states[3:], self.actions[3:]
        #print "states.shape"
        #print states.shape
        #print "actions.shape"
        #print actions.shape

        if len(self.itr) == 0:
            self.itr = np.array([states.shape[0]])
        else:
            self.itr = np.hstack((self.itr, states.shape[0]))

        '''if states.shape[0] > 2700.0:
            f = os.path.join(self.path, 'statesToValidate.npy')
            np.save(f, states)
            IPython.embed()'''

        
        fits = []

        #actions = actions.ravel()
        self.clf = KernelRidge(alpha=1.0)
        self.clf.kernel = 'rbf'
        print "SIZE: ", states.shape
        self.clf.fit(states, actions)
        #IPython.embed()
        actions_pred = self.clf.predict(states)
        bad_state = np.zeros(actions_pred.shape[0])
        for i in range(actions_pred.shape[0]):
            fit =  LA.norm(actions_pred[i,:] - actions[i,:])
            fits.append(fit)

        med = np.median(np.array(fits))
        for fit in fits:
            if(fit>med):
                bad_state[i] = 1

        IPython.embed()

        if self.useSHIV:
            self.labels = np.zeros(states.shape[0])+1.0
            self.scaler = preprocessing.StandardScaler().fit(states)
            states_proc = self.scaler.transform(states)
            
            good_labels = bad_state == 0.0         
            states_g = states_proc[good_labels,:] 

            bad_labels = bad_state == 1.0 
            states_b = states_proc[bad_labels,:] 
            #IPython.embed()
            self.ahqp_solver_g.assembleKernel(states_g, np.zeros(states_g.shape[0])+1.0)
            self.ahqp_solver_b.assembleKernel(states_b, np.zeros(states_b.shape[0])+1.0)
            #IPython.embed()
            self.ahqp_solver_g.solveQP()
            self.ahqp_solver_b.solveQP()

            #score = self.clf.score(states, actions)
            #print score
        
        self.plot(fits, states, med)

    def askForHelp(self,state):
        if self.useSHIV:
            state = self.scaler.transform(state)
            if self.ahqp_solver_b.predict(state)==1.0:
                return -1.0
            else:
                return self.ahqp_solver_g.predict(state)
        else:
            return -1

    
    def plot(self, fits, states, threshold):
        index = range(len(states))
        t = np.ones(len(index)) * threshold
        plt.figure(1)
        plt.plot(index, fits, color='b', linewidth=4.0)
        plt.plot(index, t, color='r', linewidth=4.0)
        plt.ylabel('Fit')
        plt.xlabel('Index of State')

        plt.show()


    def getAction(self, state):
	"""
	Returns a prediction given the input state.
	Uses neural net or SVM based on global
	settings.
	"""

	return self.clf.predict(state)


    def initModel(self, useSHIV):
        self.useSHIV = useSHIV
        try:
            self.states = np.load(self.stateFile)
            self.actions = np.load(self.inptFile)
        except IOError:
            self.states = np.array([-8,8.75,0,-12,22,0,-15,21.13043404,
                                     0,-12,18.52173996,0,-15,14.173913,
                                     0,-12,8.08695698,0,0,0,0,0])
            self.actions = np.array([0,0,0,0])
        #self.trainModel(self.states, self.actions)

    def updateModel(self, s, a):
	self.states = np.vstack((self.states, s))
	self.actions = np.vstack((self.actions,a))
	#self.trainModel(self.states, self.actions)

    def saveModel(self):
        path = 'matrices/oldData/'
        currT = strftime("%Y-%m-%d %H:%M:%S", gmtime())

        inptFileOut = os.path.join(path, 'inputs' + currT + '.npy')
        stateFileOut = os.path.join(path, 'states' + currT + '.npy')

        np.save(stateFileOut, self.states)
	np.save(inptFileOut, self.actions)
        np.save(self.itrFile, self.itr)
コード例 #41
0
def dfun(x):
    return 2*sp.sin(x)*sp.cos(x) + sp.exp(-sp.cos(x)) * sp.sin(x)


lengthscale = 1.
gamma = 1 / (2 * lengthscale**2)

krr = KernelRidge(kernel='rbf_periodic', gamma=gamma, alpha=1.0e-1, gammaL=0.1*gamma, gammaU=10*gamma, max_lhood=False)
X = 12*sp.random.random_sample(210) - 1
X.sort()
y = fun(X) + sp.random.normal(scale=0.1, size=len(X))
X = sp.atleast_2d(X).T
krr.fit_w_noise(X, y)
Xtest = sp.atleast_2d(sp.linspace(X.min(), X.max(), 200)).T

y_pred, MSE = krr.predict(Xtest, MSE=True)
y_smooth = krr.predict(X).ravel()

yprime_ = krr.predict_gradient(Xtest).ravel()

print("noise = %.3e, lengthscale = %.3e" % (krr.noise.mean(), 1/(2 * krr.gamma)**0.5))

plt.clf()
plt.close()
fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(24, 16))

ax0.plot(Xtest, fun(Xtest), 'g--')
ax0.scatter(X, y, c='r', marker = '+', alpha = .5)
ax0.scatter(Xtest, y_pred, c='b', marker = 'o', alpha = .5)
# ax0.fill(sp.concatenate([X, X[::-1]]),
#          sp.concatenate([y_smooth - 1.9600 * krr.noise,
コード例 #42
0
tokenizer.fit_on_texts(train_texts)
train_sequences = sequence.pad_sequences( tokenizer.texts_to_sequences( train_texts ) , maxlen=max_sent_len )
test_sequences = sequence.pad_sequences( tokenizer.texts_to_sequences( test_texts ) , maxlen=max_sent_len )
train_matrix = tokenizer.texts_to_matrix( train_texts )
test_matrix = tokenizer.texts_to_matrix( test_texts )
embedding_weights = np.zeros( ( max_features , embeddings_dim ) )
for word,index in tokenizer.word_index.items():
  if index < max_features:
    try: embedding_weights[index,:] = embeddings[word]
    except: embedding_weights[index,:] = np.random.rand( 1 , embeddings_dim )

print ("")
print ("Method = Linear ridge regression with bag-of-words features")
model = KernelRidge( kernel='linear' )
model.fit( train_matrix , train_labels )
results = model.predict( test_matrix )
if not(is_geocoding): 
  print ("RMSE = " + repr( np.sqrt(mean_squared_error( test_labels , results )) ) )
  print ("MAE = " + repr( mean_absolute_error( test_labels , results ) ) )
else: 
  print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) )
  print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) )

print ("")
print ("Method = MLP with bag-of-words features")
np.random.seed(0)
model = Sequential()
model.add(Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(embeddings_dim, activation='relu'))
model.add(Dropout(0.25))
コード例 #43
0
#from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
print y
print
X = np.random.randn(n_samples, n_features)
print X
#clf = SVR(C=1.0, epsilon=0.2)
clf = KernelRidge(alpha=1.0)
clf.fit(X, y) 

print y[1]
print clf.predict(X[1])
コード例 #44
0
Overall_Y_Pred = np.zeros(len(X))
for i in [t+1 for t in list(range(4))]:
    to_exclude = list(range(i))
    folder_train = np.asarray(to_exclude).astype(int)
    #index_train starts with the first folder
    index_train = index[folder_train];
    index_test = [element for i, element in enumerate(index) if i not in to_exclude]
    print (len(index_test))
    #train set starts with the first folder
    X_train = X[np.hstack(index_train)]
    Y_train = Y[np.hstack(index_train)]
    X_test = X[np.hstack(index_test)]
    Y_test = Y[np.hstack(index_test)]
    # train on training sets
    model.fit(X_train, Y_train)
    Y_test_Pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(Y_test, Y_test_Pred))
    rmse_list.append(rmse)

print (rmse_list)

#Plot:
y = np.asarray(rmse_list)
x = np.asarray([t+1 for t in list(range(4))])
plt.plot(x, y, x, y, 'rs')
plt.title('Number of Folders in Training Set vs. rmse of Test Set')
plt.xlabel('Number of Folders in Training Set')
plt.ylabel('Overall RMSE of Test Set')
plt.grid(True)
plt.show()
コード例 #45
0
	#####################################################################

	# For each parameter trial
	for i in xrange(trials):

		# For regression use the Kernel Ridge method
		if model_type == "regression":

			print "\n Starting experiment for trial %d and parameter alpha = %3f\n " % (i, alpha_grid[i])

			# Fit the kernel ridge model
			KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])
			KR.fit(K_train, y_train)

			# predict on the validation and test set
			y_pred = KR.predict(K_val)
			y_pred_test = KR.predict(K_test)
			
			# adjust prediction: needed because the training targets have been normalizaed
			y_pred = y_pred * float(y_train_std) + y_train_mean
			y_pred_test = y_pred_test * float(y_train_std) + y_train_mean

			# root mean squared error on validation
			rmse = np.sqrt(mean_squared_error(y_val, y_pred))
			perf_all_val.append(rmse)

			# root mean squared error in test 
			rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
			perf_all_test.append(rmse_test)

			print "The performance on the validation set is: %3f" % rmse
コード例 #46
0
data2 = [ ( [ float(row[i]) for i in range(len(row) - 2) ] , ( float( row[ len(row) - 2 ] ) , float( row[ len(row) - 1 ] ) ) ) for row in csv.reader( open("default_plus_chromatic_features_1059_tracks.txt"), delimiter=',', quoting=csv.QUOTE_NONE) ]
np.random.seed(0)
np.random.shuffle( data2 )
train_size2 = int(len(data2) * percent)
train_matrix2 = np.array( [ features for ( features, label ) in data2[0:train_size2] ] )
test_matrix2 = np.array( [ features for ( features, label ) in data2[train_size2:-1] ] )
train_labels2 = [ label for ( features , label ) in data2[0:train_size2] ]
test_labels2 = [ label for ( features , label ) in data2[train_size2:-1] ]
train_matrix2 = preprocessing.scale( train_matrix2 )
test_matrix2 = preprocessing.scale( test_matrix2 )

print ("")
print ("Method = Linear ridge regression - Default features")
model = KernelRidge( kernel='linear' )
model.fit( train_matrix1 , train_labels1 )
results = model.predict( test_matrix1 )
print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) )
print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) )
print ("Method = Linear ridge regression - Default features + chromatic features")
model = KernelRidge( kernel='linear' )
model.fit( train_matrix2 , train_labels2 )
results = model.predict( test_matrix2 )
print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) )
print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) )

print ("")
print ("Method = Random forest regression - Default features")
model = RandomForestRegressor( n_estimators=100 , random_state=0 )
model.fit( train_matrix1 , train_labels1 )
results = model.predict( test_matrix1 )
print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) )
コード例 #47
0
ファイル: ridge.py プロジェクト: rahlk/Bellwether
class RidgeMKL:
    """A MKL model in a transductive setting (test points are presented at training time).

    """

    mkls = {
        "align": Align,
        "alignf": Alignf,
        "alignfc": Alignf,
        "uniform": UniformAlignment,
    }

    mkls_low_rank = {
        "align": AlignLowRank,
        "alignf": AlignfLowRank,
        "alignfc": AlignfLowRank,
        "uniform": UniformAlignmentLowRank,
    }

    #  alignf expects kernels to be centered
    centered   = {"alignf", "alignfc"}
    supervised = {"align", "alignf", "alignfc"}

    def __init__(self, lbd=0, method="align", method_init_args={}, low_rank=False):
        """
        :param method: (``string``) "align", "alignf", or "uniform", MKL method to be used.

        :param low_rank: (``bool``) Use low-rank approximations.

        :param method_init_args: (``dict``) Initialization arguments for the MKL methods.

        :param lbd: (``float``) L2-regularization.
        """

        self.method  = method
        if not low_rank:
            self.mkl_model  = self.mkls[method](**method_init_args)
            if method == "alignfc":
                init_args = method_init_args.copy()
                init_args["typ"] = "convex"
                self.mkl_model  = self.mkls[method](**init_args)
        else:
            self.mkl_model  = self.mkls_low_rank[method](**method_init_args)
            if method == "alignfc":
                init_args = method_init_args.copy()
                init_args["typ"] = "convex"
                self.mkl_model  = self.mkls_low_rank[method](**init_args)
        self.lbd        = lbd
        self.low_rank   = low_rank
        self.trained    = False


    def fit(self, Ks, y, holdout=None):
        """Learn weights for kernel matrices or Kinterfaces.

        :param Ks: (``list``) of (``numpy.ndarray``) or of (``Kinterface``) to be aligned.

        :param y: (``numpy.ndarray``) Class labels :math:`y_i \in {-1, 1}` or regression targets.

        :param holdout: (``list``) List of indices to exlude from alignment.
        """

        # Expand kernel interfaces to kernel matrices
        expand = lambda K: K[:, :] if isinstance(K, Kinterface) else K
        Hs     = map(expand, Ks)

        # Assert correct dimensions
        assert Ks[0].shape[0] == len(y)

        # Fit MKL model
        if self.method in self.supervised:
            self.mkl_model.fit(Hs, y, holdout=holdout)
        else:
            self.mkl_model.fit(Hs)

        if self.low_rank:
            self.X = hstack(map(lambda e: sqrt(e[0]) * e[1],
                                zip(self.mkl_model.mu, Hs)))

            if self.method in self.centered:
                self.X = center_kernel_low_rank(self.X)
                self.X[where(isnan(self.X))] = 0

            # Fit ridge model with given lbd and MKL model
            self.ridge = KernelRidge(alpha=self.lbd,
                                     kernel="linear", )

            # Fit ridge on the examples minus the holdout set
            inxs = list(set(range(Hs[0].shape[0])) - set(holdout))
            self.ridge.fit(self.X[inxs], y[inxs])
            self.trained = True

        else:
            # Fit ridge model with given lbd and MKL model
            self.ridge = KernelRidge(alpha=self.lbd,
                                     kernel=self.mkl_model, )

            # Fit ridge on the examples minus the holdout set
            inxs = array(list(set(range(Hs[0].shape[0])) - set(holdout)))
            inxs = inxs.reshape((len(inxs), 1)).astype(int)
            self.ridge.fit(inxs, y[inxs])
            self.trained = True


    def predict(self, inxs):
        """
        Predict values for data on indices inxs (transcductive setting).

        :param inxs: (``list``) Indices of samples to be used for prediction.

        :return: (``numpy.ndarray``) Vector of prediction of regression targets.
        """
        assert self.trained

        if self.low_rank:
            return self.ridge.predict(self.X[inxs])
        else:
            inxs = array(inxs)
            inxs = inxs.reshape((len(inxs), 1)).astype(int)
            return self.ridge.predict(inxs).ravel()
コード例 #48
0
############## Prediction and save to file ####################################
import os

try:
    os.remove("/data/ISOTROPIC/data/KRR_rbf_sspacing4_tspacing6.nc")
except OSError:
    pass
ncfile2 = Dataset("/data/ISOTROPIC/data/KRR_rbf_sspacing4_tspacing6.nc", "w")

ncfile1 = Dataset("/data/ISOTROPIC/data/data_downsampled4.nc", "r")

# create the dimensions
ncfile2.createDimension("Nt", Nt)
ncfile2.createDimension("Nz", Nh)
ncfile2.createDimension("Ny", Nh)
ncfile2.createDimension("Nx", Nh)
# create the var and its attribute
var = ncfile2.createVariable("Urec", "d", ("Nt", "Nz", "Ny", "Nx"))

for t in range(Nt):
    print("3D snapshot:", t)
    for i in range(Nh):
        xl = np.array(ncfile1.variables["velocity_x"][t, 0:Nh:sspacing, 0:Nh:sspacing, i])  # load only LR
        xl = np.divide(np.reshape(xl, (1, Nl * Nl)) - mea_l, sig_l)  # pre-normalize
        xrec = np.multiply(kr.predict(xl), sig_h) + mea_h  # re-normalize the prediction
        var[t, :, :, i] = np.reshape(xrec, (Nh, Nh))  # put to netcdf file

# Close file
ncfile1.close()
ncfile2.close()
コード例 #49
0
embeddings = Word2Vec.load_word2vec_format( "GoogleNews-vectors-negative300.bin.gz" , binary=True )
train_matrix = [ ]
train_labels = [ ]
for word,scores in affective.items():
  try:
    train_matrix.append( embeddings[word] )
    train_labels.append( scores )
  except: continue
model = KernelRidge( kernel='poly' , degree=4 )
model.fit( train_matrix , train_labels )
textdata = " ".join( open(sys.argv[1] + ".revised.txt",'r').readlines( ) )
tokenizer = Tokenizer(nb_words=max_words, filters=keras.preprocessing.text.base_filter(), lower=True, split=" ")
tokenizer.fit_on_texts( textdata )
for word, index in tokenizer.word_index.items():
  try:
    if not affective.has_key(word) : affective[word] = np.array( model.predict( np.array( embedding[word] ).reshape(1, -1) )[0] )
  except: affective[word] = np.array( [ 5.0 , 5.0 , 5.0 ] )

# Process the textual contents
textdata = "" 
file1 = open(sys.argv[1] + ".revised.txt",'r')
with file1 as myfile: textdata = re.sub( ">", "&gt;" , re.sub("<" , "&lt;" , re.sub( "&" , "&amp;" , re.sub( "   +", "\n\n" , re.sub( "\t" , " ", re.sub( "\r" , "" ,  "".join( myfile.readlines() ) ) ) ) ) ) )
corenlp = StanfordCoreNLP( )
file2 = open(sys.argv[1] + ".annotated.tsv",'w')
file3 = open(sys.argv[1] + ".annotated.xml",'w')
print >>file2, "PARAGRAPH NUMBER\tENTITY TYPE\tENTITY\tCO-OCCURRING NOUNS\tCO-OCCURRING ADJECTIVES\tCO-OCCURRING VERBS\tVALENCE\tAROUSAL\tDOMINANCE\tSENTENCE"
print >>file3, "<document name='" + sys.argv[1] + "'>"
parnum = 0
sys.stdout.write("Processing text...")
try:
  for paragraph in re.split("\n\n", textdata):
コード例 #50
0
ファイル: embedding.py プロジェクト: capoe/soapxx
def parametrize_environment_specific(settings, rerun):
    channel_name = settings["embedding_options"]["channel_name"]
    log << log.mg << "Parametrizing" << channel_name << "model" << log.endl
    soap_types = SETTINGS["soap_types"]
    log << "Particle SOAP types are" << ", ".join(soap_types) << log.endl
    # PATHS - for example:
    # { "xyz_file": "data_esol/structures.xyz",
    #   "soap_file": "data_esol/structures.soap",
    #   "kmat_file": "data_esol/kernel.npy",
    #   "targets_file": "data_esol/targets.npy",
    #   "range_file": "data_esol/range.json",
    #   "weights_file": "data_esol/weights.npy" }
    paths = copy.deepcopy(settings["paths"])
    for p,v in paths.iteritems():
        paths[p] = os.path.join(PATH, v)
        log << "Path to %s = %s" % (p, paths[p]) << log.endl
    configs = soap.tools.io.read(paths["xyz_file"])
    # SOAP
    soap_options = SETTINGS["soap_options"][settings["soap_options_ref"]]
    if rerun or not os.path.isfile(paths["soap_file"]):
        log << "Make target: %s" % paths["soap_file"] << log.endl
        soap_configure_default(types=soap_types)
        dset = soap_evaluate(configs, soap_options, paths["soap_file"])
    else:
        log << "Load target: %s" % paths["soap_file"] << log.endl
        dset = soap.DMapMatrixSet(paths["soap_file"])
    # KERNEL
    kernel_options = settings["kernel_options"]
    if rerun or not os.path.isfile(paths["kmat_file"]):
        log << "Make target: %s" % paths["kmat_file"] << log.endl
        K = kernel_evaluate(dset, kernel_options, paths["kmat_file"])
    else:
        log << "Load target: %s" % paths["kmat_file"] << log.endl
        K = np.load(paths["kmat_file"])
    # TARGETS
    target_key = settings["regression_options"]["target_key"]
    if rerun or not os.path.isfile(paths["targets_file"]):
        log << "Make target: %s" % paths["targets_file"] << log.endl
        targets = np.array([float(c.info[target_key]) for c in configs])
        np.save(paths["targets_file"], targets)
    else:
        log << "Load target: %s" % paths["targets_file"] << log.endl
        targets = np.load(paths["targets_file"])
    # MODEL
    regr_options = settings["regression_options"]
    if rerun or not os.path.isfile(paths["weights_file"]):
        log << "Make target: %s" % paths["weights_file"] << log.endl
        y_avg = np.average(targets)
        krr = KernelRidge(
            alpha=regr_options["lreg"],
            kernel='precomputed')
        krr.fit(K**regr_options["xi"], targets)
        y_predict = krr.predict(K**regr_options["xi"])
        kweights = krr.dual_coef_
        np.save(paths["weights_file"], kweights)
        np.save(paths["pred_file"], y_predict)
    else:
        log << "Load target: %s" % paths["weights_file"] << log.endl
        kweights = np.load(paths["weights_file"])
        y_predict = np.load(paths["pred_file"])
    if rerun or not os.path.isfile(paths["range_file"]):
        dset_attr = soap.DMapMatrixSet(paths["soap_file"])
        delta_Ys = kernel_attribute(dset_attr, dset, kernel_options, kweights, regr_options["xi"])
        json.dump(delta_Ys, open(paths["range_file"], "w"))
    else:
        delta_Ys = json.load(open(paths["range_file"]))
コード例 #51
0
for i in [t+1 for t in list(range(4))]:
    to_exclude = list(range(i))
    folder_train = np.asarray(to_exclude).astype(int)
    #train index starts with the first folder
    index_train = index[folder_train];
    index_test = [element for k, element in enumerate(index) if k not in to_exclude]
    #train set starts with the first folder
    X_train = X[np.hstack(index_train)]
    Y_train = Y[np.hstack(index_train)]
    model.fit(X_train, Y_train)
    rmse_folder = []
    for item in index_test:
        folder_X = X[item]
        folder_Y = Y[item]
        # train on training sets
        Y_test_Pred = model.predict(folder_X)
        rmse = np.sqrt(mean_squared_error(folder_Y, Y_test_Pred))
        rmse_folder.append(rmse)
    print(rmse_folder)
    rmse_list.append(rmse_folder)



print(rmse_list)
print("\n")


#Plot
sub = [221, 222, 223, 224]
for i in list(range(4)):
    y = np.asarray(rmse_list[i])
コード例 #52
0
                param_grid = {"alpha": alphaVec, "kernel": [RBF(length_scale) for length_scale in sigmaVec]}
                kr = KernelRidge()
                kr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid)
            else:
                # Run with pre-defined parameter set
                kr = KernelRidge(alpha=alphaVec[0], kernel='rbf', gamma=sigmaVec[0])
            
            # Fit model
            kr.fit(predictor.reshape(-1,1), predictand.reshape(-1,1))
            
            # Get best parameters
            bestAlpha_kr = kr.best_params_['alpha']
            bestSigma_kr = kr.best_params_['kernel'].length_scale

            # Predict over grid
            kr_fit = kr.predict(predictor_grid.reshape(-1,1))
            
            # Compute derivatives of prediction
            kr_der1 = np.gradient(kr_fit[:,0])
            kr_der2 = np.gradient(kr_der1)
            
            # Estimate decorrelation time KR
            if bestSigma_kr >= 2:
                minDer1 = 0.005 #0.001
            else:
                minDer1 = 0.0
                
            minNormSpread = 0.75
            minNormSpread = 0.75*np.nanmedian(dt.from_dB(predictand)[dt.from_dB(predictor)+dB_shift_hr >= maxLeadTimeHours/2])
            print('Minimum spread to reach:', minNormSpread)
            minNormSpread_dB = dt.to_dB(minNormSpread)
コード例 #53
0
#MSE for SGD 292.104437304
#R2 for SGD 0.954873464267'''


####Develop models using various tuned algorithms above
lr = LinearRegression()
lr.fit(x_train, y_train)
y_predicted = lr.predict(x_test)

svr = SVR(C=10, gamma =1, kernel = 'linear')
svr.fit(x_train_scaled, y_train)
y2 = svr.predict(x_test_scaled)

kr = KernelRidge(alpha=0.0001, coef0=1, degree=1, gamma=0.001, kernel='rbf',kernel_params=None)
kr.fit(x_train_scaled, y_train)
y3 = kr.predict(x_test_scaled)

lasso = Lasso(alpha=1e-09)
lasso.fit(x_train_scaled, y_train)
y4 = lasso.predict(x_test_scaled)

linear_ridge = Ridge(alpha=0.1)
linear_ridge.fit(x_train_scaled,y_train)
y5 = linear_ridge.predict(x_test_scaled)

bayesian_ridge = BayesianRidge(alpha_1=1e-05, alpha_2=10, lambda_1=10, lambda_2=1e-05)
bayesian_ridge.fit(x_train_scaled, y_train)
y6 = bayesian_ridge.predict(x_test_scaled)

sgd = SGDRegressor(alpha=0.1, epsilon=0.001, l1_ratio=0.2, loss='squared_loss', penalty='none', power_t=0.2)
sgd.fit(x_train_scaled, y_train)
コード例 #54
0
ファイル: 999.py プロジェクト: memoiry/2016-
kr = KernelRidge(kernel='rbf', gamma=gamma1,alpha = alpha)

t0 = time.time()
svr.fit(X[:train_size], y[:train_size])
svr_fit = time.time() - t0

t0 = time.time()
kr.fit(X[:train_size], y[:train_size])
kr_fit = time.time() - t0

t0 = time.time()
y_svr = svr.predict(X_plot)
svr_predict = time.time() - t0

t0 = time.time()
y_kr = kr.predict(X_plot)
kr_predict = time.time() - t0

xk = np.arange(18630+1440)[:,None]
#############################################################################
# look at the results
err1 = np.abs(svr.predict(X)-z)/z
err2 = np.abs(kr.predict(X)-z)/z
x1 = X.flatten()
x2 = x1
x1 = pd.DataFrame({'x':x1,'svr error %':err1,'kr error %':err2})
x2 = pd.DataFrame({'svr predict':y_svr,'kr predict': y_kr})
x2.to_excel('/users/xuguodong/desktop/data1/solution results.xls')
x1 = pd.melt(x1, id_vars=["x"], var_name="condition")
sns.lmplot(data = x1, x = 'x', y = 'value', hue = 'condition', ci=None, scatter_kws={"s": 80},lowess = True)
sv_ind = svr.support_
コード例 #55
0
ファイル: qm7_atom.py プロジェクト: crcollins/molml
from molml.kernel import AtomKernel

from utils import load_qm7


if __name__ == "__main__":
    # This is just boiler plate code to load the data
    Xin_train, Xin_test, y_train, y_test = load_qm7()

    # Look at just a few examples to be quick
    n_train = 200
    n_test = 200
    Xin_train = Xin_train[:n_train]
    y_train = y_train[:n_train]
    Xin_test = Xin_test[:n_test]
    y_test = y_test[:n_test]

    gamma = 1e-7
    alpha = 1e-7
    kern = AtomKernel(gamma=gamma, transformer=LocalEncodedBond(n_jobs=-1),
                      n_jobs=-1)
    K_train = kern.fit_transform(Xin_train)
    K_test = kern.transform(Xin_test)

    clf = KernelRidge(alpha=alpha, kernel="precomputed")
    clf.fit(K_train, y_train)
    train_error = MAE(clf.predict(K_train), y_train)
    test_error = MAE(clf.predict(K_test), y_test)
    print("Train MAE: %.4f Test MAE: %.4f" % (train_error, test_error))
    print()
コード例 #56
0
ファイル: 16S_smooth.py プロジェクト: linsalrob/EdwardsLab
    for l in fin:
        p = l.strip().split("\t")
        px.append(float(p[0]))
        py.append(float(p[1]))

ny = np.array(y)
nx = np.array(x)
pnx = np.array(px)
pny = np.array(py)


kr = KernelRidge(kernel='rbf', gamma=7.5e-5, alpha=0.001)
kr.fit(nx[:, None], ny[:, None])

x_pred = np.linspace(min(x), max(x), 10000)[:, None]
y_pred = kr.predict(x_pred)


kr.fit(pnx[:, None], pny[:, None])
px_pred = np.linspace(min(px), max(px), 10000)[:, None]
py_pred = kr.predict(px_pred)

fig = plt.figure()
ax = fig.add_subplot(111)


"""
These regions come from http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2562909/
v1: 66-99
v2: 137-242
v3: 433-497