예제 #1
0
def kernel_ridge_pre(X_train, y_train, X_pre, val):
    kernel = KernelRidge(kernel=val['kernel'],
                         alpha=val['alpha'],
                         gamma=val['gamma'])
    kernel.fit(X_train, y_train)
    y_pre = kernel.predict(X_pre)
    return y_pre
예제 #2
0
class KernelRidgeImpl():
    def __init__(self,
                 alpha=1,
                 kernel='linear',
                 gamma=None,
                 degree=3,
                 coef0=1,
                 kernel_params=None):
        self._hyperparams = {
            'alpha': alpha,
            'kernel': kernel,
            'gamma': gamma,
            'degree': degree,
            'coef0': coef0,
            'kernel_params': kernel_params
        }
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
예제 #3
0
def mytrainingaux(X, Y, par):

    #reg=neighbors.KNeighborsRegressor(n_neighbors=par)
    reg = KernelRidge(kernel='rbf', gamma=par[0], alpha=par[1])
    reg.fit(X, Y)

    return reg
예제 #4
0
def fit_krr(apar, gpar, nevt):

    # retrieve training data
    X = root2array('../../svm/no_truecc_cut_stride2_offset0.root',
                   branches='recotrklenact',
                   selection='mustopz<1275&&isnumucc==1',
                   stop=nevt).reshape(-1, 1)
    y = root2array('../../svm/no_truecc_cut_stride2_offset0.root',
                   branches='trueemu',
                   selection='mustopz<1275&&isnumucc==1',
                   stop=nevt)

    # rescale the regressors and save it
    os.system('mkdir -p models')
    scaler = preprocessing.StandardScaler().fit(X)
    scalerpn = 'models/regressor_scaler_active_a{}g{}nevt{}.pkl'.format(
        apar, gpar, nevt)
    joblib.dump(scaler, scalerpn)

    # fit the model
    krr = KernelRidge(kernel='rbf', alpha=float(apar), gamma=float(gpar))
    Xnorm = scaler.transform(X)
    krr.fit(Xnorm, y)

    # save the model
    modelpn = 'models/muon_energy_estimator_active_a{}g{}nevt{}.pkl'.format(
        apar, gpar, nevt)
    joblib.dump(krr, modelpn)
예제 #5
0
    def test_regressor_modifications(self):
        regressor = KernelRidge(alpha=1e-8, kernel="rbf", gamma=0.1)
        kpcovr = self.model(mixing=0.5,
                            regressor=regressor,
                            kernel="rbf",
                            gamma=0.1)

        # KPCovR regressor matches the original
        self.assertTrue(
            regressor.get_params() == kpcovr.regressor.get_params())

        # KPCovR regressor updates its parameters
        # to match the original regressor
        regressor.set_params(gamma=0.2)
        self.assertTrue(
            regressor.get_params() == kpcovr.regressor.get_params())

        # Fitting regressor outside KPCovR fits the KPCovR regressor
        regressor.fit(self.X, self.Y)
        self.assertTrue(hasattr(kpcovr.regressor, "dual_coef_"))

        # Raise error during KPCovR fit since regressor and KPCovR
        # kernel parameters now inconsistent
        with self.assertRaises(ValueError) as cm:
            kpcovr.fit(self.X, self.Y)
            self.assertTrue(
                str(cm.message),
                "Kernel parameter mismatch: the regressor has kernel parameters "
                "{kernel: linear, gamma: 0.2, degree: 3, coef0: 1, kernel_params: None}"
                " and KernelPCovR was initialized with kernel parameters "
                "{kernel: linear, gamma: 0.1, degree: 3, coef0: 1, kernel_params: None}",
            )
class KRR_calibration:
    def __init__(self):
        self.model = 'KRR'

    def fit(self, X, p, Y, kernel_function='rbf', **kwargs):

        from sklearn.kernel_ridge import KernelRidge

        check_attributes(X, Y)

        self.model = KernelRidge(kernel=kernel_function, **kwargs)

        observed_bias = Y - p

        self.model.fit(X, observed_bias)

        return self.model

    def predict(self, X, p=None, mode='prob'):

        if mode == 'bias':
            return self.model.predict(X)
        elif mode == 'prob':
            return self.model.predict(X) + p.flatten()
        else:
            raise ValueError("Mode %s is not defined." % mode)
예제 #7
0
def KernelRIDGE(X_train, X_dev, y_train, y_dev):
    KERNEL = 'polynomial'
    DEGREE = 2
    ALPHA = [
        0.00001, 0.00003, 0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3,
        1, 3, 10, 30, 100, 300, 1000
    ]

    ALPHA = [0.03, 0.05, 0.1, 0.15, 0.3]

    ALPHA = [0.05, 0.1, 0.2]

    ALPHA = [0.02]

    for hyper in ALPHA:
        KRR = KernelRidge(
            alpha=hyper,
            kernel=KERNEL,
            degree=DEGREE,
        )
        KRR.fit(X_train, y_train)
        ev.evaluate(KRR, 'KERNEL_RID', 'alpha', hyper, X_train, X_dev, y_train,
                    y_dev)
    print(" ")
    return KRR
예제 #8
0
 def run(self, ind_sampling, ind_fold):
     if self.fold_setting=="S4":
         nb_fold = self.nb_fold * self.nb_fold
     self.load_CV_indexes(ind_sampling)
     
     if self.CV_type == 'ClusterCV_':
             ajout = self.CV_type
     else:
         ajout = 'CV_'
     
     K_train, K_test = self.make_Ktrain_and_Ktest_MT_with_settings(self.samples_tr[ind_fold], self.samples_te[ind_fold])
     pred_score = []
     for param in range(len(self.list_param)):
         if self.type_clf=="SVM":
             clf = svm.SVC(kernel='precomputed', C=self.list_param[param])
             clf.fit(K_train, self.labels_tr[ind_fold])
             Y_test_score = clf.decision_function(K_test).tolist()
         elif self.type_clf=="KernelRidge":
             clf = KernelRidge(alpha=self.list_param[param], kernel='precomputed')
             clf.fit(K_train, inner_labels_tr[ind_fold])
             Y_test_score = clf.predict(K_test).tolist()
         else:
             raise ValueError('invalid value of type_clf')
         pred_score.append(Y_test_score)
         del clf
         del Y_test_score
     pickle.dump(pred_score, open('saved_results/MT/MT_'+str(self.nb_fold)+'fold'+ajout+self.fold_setting+"_"+self.type_clf+"_"+str(ind_fold)+"_"+str(ind_sampling)+".data", 'wb'))
     del K_train
     del K_test
예제 #9
0
def ridgeReg(X, y):

    X_train, X_test, y_train, y_test = train_test_split(np.array(X)[:, 6:],
                                                        np.array(y),
                                                        test_size=0.20,
                                                        random_state=1)
    #print(X_test)
    regr = KernelRidge(alpha=10, kernel="polynomial", gamma=0.5)
    regr.fit(X_train, y_train)
    y_pred = regr.predict(X_test)

    index = 0
    for i in y_pred:
        #print("ypred = " + str(i) + " y test = " + str(y_test[index]))
        index = index + 1

    #print('Coefficients: \n', regr.coef_)
    # The mean squared error
    print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
    # Explained variance score: 1 is perfect prediction
    print('Variance score: %.2f' % r2_score(y_test, y_pred))

    #What were the real predictions?
    y_pred_train = regr.predict(X_train)
    print("Mean squared error on the training set: %.2f" %
          mean_squared_error(y_train, y_pred_train))
    print("Mean squared error on the test set:     %.2f" %
          mean_squared_error(y_test, y_pred))
    print("size of X = ", str(len(y)))
def get_reconstruction_error(ct, data, nsplits=4, clf='kridge'):
    tasknames = [i.split('.')[0] for i in data.columns]
    tasks = list(set(tasknames))
    tasks.sort()
    chosen_vars = []
    #print(ct,tasks,tasknames)
    for i in ct:
        vars = [
            j for j in range(len(tasknames))
            if tasknames[j].split('.')[0] == tasks[i]
        ]
        chosen_vars += vars
    kf = KFold(n_splits=nsplits, shuffle=True)
    fulldata = data.values
    #subdata=data.ix[:,chosen_vars].values
    if clf == 'kridge':
        linreg = KernelRidge(alpha=1)
    elif clf == 'rf':
        linreg = RandomForestRegressor()
    else:
        linreg = LinearRegression()
    scaler = StandardScaler()
    pred = numpy.zeros(fulldata.shape)
    for train, test in kf.split(fulldata):
        #fulldata_train=fulldata[train,:]
        #fulldata_test=fulldata[test,:]
        # fit scaler to train data and apply to test
        fulldata_train = scaler.fit_transform(fulldata[train, :])
        fulldata_test = scaler.transform(fulldata[test, :])
        subdata_train = fulldata_train[:, chosen_vars]
        subdata_test = fulldata_test[:, chosen_vars]
        linreg.fit(subdata_train, fulldata_train)
        pred[test, :] = linreg.predict(subdata_test)
    cc = numpy.corrcoef(scaler.transform(fulldata).ravel(), pred.ravel())[0, 1]
    return cc
def local_bias_estimator(X,
                         Y,
                         p,
                         X_grid,
                         model='KRR',
                         kernel_function='rbf',
                         **kwargs):

    check_attributes(X, Y)

    if model == 'KRR':
        from sklearn.kernel_ridge import KernelRidge
        model = KernelRidge(kernel=kernel_function, **kwargs)
        # kr = KernelRidge(alpha=alpha, kernel='rbf', **kwargs)
    elif model == 'SVR':
        from sklearn.svm import SVR
        model = SVR(kernel=kernel_function, **kwargs)
    elif model == 'EWF':
        K = pairwise_kernels(X, X_grid, metric=kernel_function, **kwargs)
        p_err = Y - p
        bias = np.sum(p_err.flatten() * K.T, axis=1) / np.sum(K.T, axis=1)
        return bias
    else:
        raise ValueError("Model %s is not defined." % model)

    bias_calibration = Y - p

    model.fit(X, bias_calibration)
    bias = model.predict(X_grid)

    return bias
예제 #12
0
def mytraining(X, Y):

    #reg = svm.SVR(kernel='rbf',C=1000,gamma=0.1)
    reg = KernelRidge(alpha=0.001, coef0=1, degree=3, gamma=0.1, kernel='rbf')
    reg.fit(X, Y.ravel())

    return reg
예제 #13
0
def choose_krr_gamma(train_x, test_x, train_y, test_y):
    gammas = [0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.0]
    gamma_scores = []
    best_g_score = 0.0
    best_g = ""

    for g in gammas:
        krr = KernelRidge(kernel="laplacian", gamma=g)
        krr.fit(train_x, train_y)
        krr.predict(test_x)
        score = krr.score(test_x, test_y)
        if score > best_g_score:
            best_g_score = score
            best_g = g
        gamma_scores.append(score)

    print(gamma_scores)
    print("Best gamma: " + str(best_g))
    print("Score received: " + str(best_g_score))

    plt.plot(gammas, gamma_scores)
    plt.xlabel('Gamma')
    plt.ylabel('Score')
    plt.title('Tuning Gamma Hyperparameter for KRR')
    plt.show()
예제 #14
0
def choose_krr_alpha(train_x, test_x, train_y, test_y):
    alphas = [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.0]
    alpha_scores = []
    best_a_score = 0.0
    best_a = ""

    for a in alphas:
        krr = KernelRidge(kernel="laplacian", alpha=a)
        krr.fit(train_x, train_y)
        krr.predict(test_x)
        score = krr.score(test_x, test_y)
        if score > best_a_score:
            best_a_score = score
            best_a = a
        alpha_scores.append(score)

    print(alpha_scores)
    print("Best alpha: " + str(best_a))
    print("Score received: " + str(best_a_score))

    plt.plot(alphas, alpha_scores)
    plt.xlabel('Alpha')
    plt.ylabel('Score')
    plt.title('Tuning Alpha Hyperparameter for KRR')
    plt.show()
예제 #15
0
def choose_krr_kernel(train_x, test_x, train_y, test_y):
    kernels = ['linear', 'rbf', 'laplacian', 'polynomial', 'sigmoid']
    kernel_scores = []
    best_k_score = 0.0
    best_k = ""

    for k in kernels:
        krr = KernelRidge(kernel=k)
        krr.fit(train_x, train_y)
        krr.predict(test_x)
        score = krr.score(test_x, test_y)
        if score > best_k_score:
            best_k_score = score
            best_k = k
        kernel_scores.append(score)

    print(kernel_scores)
    print("Best kernel: " + str(best_k))
    print("Score received: " + str(best_k_score))

    plt.bar(kernels, kernel_scores)
    plt.xlabel('Kernel')
    plt.ylabel('Score')
    plt.xticks(np.arange(len(kernels)), kernels)
    plt.title('Tuning Kernel Hyperparameter for KRR')
    plt.show()
예제 #16
0
    def test_incompatible_coef_shape(self):

        # self.Y is 2D with two targets
        # Don't need to test X shape, since this should
        # be caught by sklearn's _validate_data
        regressor = KernelRidge(alpha=1e-8, kernel="linear")
        regressor.fit(self.X, self.Y[:, 0][:, np.newaxis])
        kpcovr = self.model(mixing=0.5, regressor=regressor)

        # Dimension mismatch
        with self.assertRaises(ValueError) as cm:
            kpcovr.fit(self.X, self.Y[:, 0])
            self.assertTrue(
                str(cm.message),
                "The regressor coefficients have a dimension incompatible "
                "with the supplied target space. "
                "The coefficients have dimension %d and the targets "
                "have dimension %d" %
                (regressor.dual_coef_.ndim, self.Y[:, 0].ndim),
            )

        # Shape mismatch (number of targets)
        with self.assertRaises(ValueError) as cm:
            kpcovr.fit(self.X, self.Y)
            self.assertTrue(
                str(cm.message),
                "The regressor coefficients have a shape incompatible "
                "with the supplied target space. "
                "The coefficients have shape %r and the targets "
                "have shape %r" % (regressor.dual_coef_.shape, self.Y.shape),
            )
예제 #17
0
 def get_SVM_NTK(self, for_test: bool):
     if self.params['kernel_ridge']:
         clf = KernelRidge(alpha=self.params['ridge_coef'][0],
                           kernel="precomputed")
     else:
         clf = SVR(kernel="precomputed",
                   C=self.params['svm_coef'][0],
                   epsilon=self.params['svm_coef'][1],
                   cache_size=100000)
     output = []
     train = not for_test
     Ys_ = self.test_Ys_ if for_test else self.Ys_
     N = self.N_test if for_test else self.N_train
     for idx in range(N):
         NTK_train = self.get_ntk(fst_train=train,
                                  fst_idx=idx,
                                  fst_qry=False,
                                  snd_train=train,
                                  snd_idx=idx,
                                  snd_qry=False,
                                  ridge=True)
         NTK_test = self.get_ntk(fst_train=train,
                                 fst_idx=idx,
                                 fst_qry=True,
                                 snd_train=train,
                                 snd_idx=idx,
                                 snd_qry=False,
                                 ridge=False)
         y = Ys_[idx]
         time_evolution = self.time_evolution(NTK_train,
                                              self.params['inner_lr'])
         clf.fit(X=NTK_train, y=time_evolution @ y)
         pred = clf.predict(X=NTK_test)
         output.append(pred)
     return np.concatenate(output)
예제 #18
0
def train_model(input_X_h5_loc, labels_y_h5_loc, model_loc, alpha, kernel, gamma, degree, coef0, save_model):
    """
    Trains a kernel ridge regression model

    See Scikit-learn documentation : http://scikit-learn.org/stable/modules/generated/sklearn.
                                            kernel_ridge.KernelRidge.html#sklearn.kernel_ridge.KernelRidge
    """

    total_time = time.time()

    # Loading inputs and targets
    input_X = np.array(h5py.File(input_X_h5_loc)[inputs_key])
    labels_y = np.array(h5py.File(labels_y_h5_loc)[targets_key]).reshape((-1,))

    # Creating model
    model = KernelRidge(degree=degree, coef0=coef0, kernel=kernel, gamma=gamma, alpha=alpha)

    # Model training
    model.fit(input_X, labels_y)

    # Saving the model if specified
    if save_model:
        os.makedirs(model_loc[:model_loc.rindex(os.path.sep)], exist_ok=True)
        joblib.dump(model, model_loc)

    print("--- %s seconds ---" % (time.time() - total_time))
예제 #19
0
    def train_krrl_linear(self, data):
        train, validacion = data
        x_tr, y_tr = train
        x_val, y_val = validacion
        #print("El set de train tiene {} filas y {} columnas".format(x_tr.shape[0],x_tr.shape[1]))
        #print("El set de validacion tiene {} filas y {} columnas".format(x_val.shape[0],x_val.shape[1]))

        print('Start training KernerRidge with linear kernel...')
        start_time = self.timer()

        krrl = KernelRidge(alpha=1)
        krrl.fit(x_tr, y_tr)
        print("The R2 is: {}".format(krrl.score(x_tr, y_tr)))
        #		print("The alpha choose by CV is:{}".format(krrl.alpha_))
        self.timer(start_time)

        print("Making prediction on validation data")
        y_val = np.expm1(y_val)
        y_val_pred = np.expm1(krrl.predict(x_val))
        mae = mean_absolute_error(y_val, y_val_pred)
        print("El mean absolute error de es {}".format(mae))

        print('Saving model into a pickle')
        try:
            os.mkdir('pickles')
        except:
            pass

        with open('pickles/krrlLinearK.pkl', 'wb') as f:
            pickle.dump(krrl, f)

        print('Making prediction and saving into a csv')
        y_test = krrl.predict(self.x_test)

        return y_test
예제 #20
0
def RunKernel(XTrain, YTrain, XVal, YVal, XTest, YTest):
    print("Optimizing Kernel Ridge Regression Parameters")
    #BestAlpha, BestGamma = DoGridSearch(XTrain, YTrain.ravel())
    BestAlpha = 0.01
    BestGamma = 0.001
    KRR = KernelRidge(kernel='laplacian', gamma=BestGamma, alpha=BestAlpha)
    KRR.fit(XTrain, YTrain.ravel())

    YPredTrain = KRR.predict(XTrain)
    DiffYTrain = abs(YPredTrain - YTrain.ravel())
    print(sum(DiffYTrain) / float(len(DiffYTrain)))

    YPred = KRR.predict(XTest)
    DiffY = abs(YPred - YTest.ravel())
    MAEPredicted = sum(DiffY) / float(len(DiffY))
    print(BestAlpha, BestGamma)
    print(MAEPredicted)

    plt.scatter(YTest.tolist(), YPred.tolist(), c='red', s=5)
    plt.plot(np.linspace(0, 0.5, 2), np.linspace(0, 0.5, 2))
    plt.ylabel('Predicted Excitation Energy (a.u.)')
    plt.xlabel('True Excitation Energy (a.u.)')
    plt.title(
        'Kernel Ridge Regression (Laplacian) Learned Excitation Energies')
    plt.show()


#RunKernel()
예제 #21
0
    def train_kernel_ridge_regression_clf(self,
                                          train_daylist,
                                          distinct,
                                          gamma=1,
                                          alpha=1):
        daytest = self.select_test_day(train_daylist)
        y_train = []
        X_train = []

        for day in daytest:
            for slice in range(144):
                dateslice = day + '-' + str(slice + 1)
                #feature,gap = self.generateFeatureLabel(dateslice,distinct)
                feature, gap = self.feature.generate(dateslice, distinct)

                if feature != None:
                    if gap != 0:
                        gap = math.log10(float(gap))
                    else:
                        gap = -0.1
                    X_train.append(feature)
                    y_train.append(gap)
        clf = KernelRidge(kernel='polynomial', gamma=gamma, alpha=alpha)
        #clf = KernelRidge(kernel='polynomial', degree=3,alpha=0.01)
        clf.fit(X_train, y_train)

        return clf
예제 #22
0
def prin(X,y,file,dic):
	t=100
	#clf = MLPRegressor(solver=dic['solver'],activation=dic['activation'],hidden_layer_sizes=eval(dic['hls']), batch_size = dic['batch_size'], max_iter=dic['max_iter'])
	#clf = LinearRegression()
	clf=KernelRidge(alpha=0.001,kernel='laplacian',degree=18)
	X_train, X_test, y_train, y_test= cross_validation.train_test_split(X,y,test_size=float(dic['test_size']))
	clf.fit(X_train, y_train)
	
	print 'Training size',len(X_train)
	print 'Testing size',len(X_test)
	#scores = cross_val_score(clf, X, y, cv=5)
	#print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

	accuracy = clf.score(X_train,y_train)
	print 'accuracy',accuracy,'\n'
	print 'RMSE',math.sqrt(metrics.mean_squared_error(y_test,clf.predict(X_test)))
	MAE=metrics.mean_absolute_error(y_test,clf.predict(X_test))
	print 'MAE',MAE 
	#X_test,y_test=X[-t:],y[-t:]
	#file=file[-t:]
	pr=clf.predict(X_test)
	print 'Filename                 Percentage Error         Actual Value      Predicted Value           Difference\n'
	for i in range (len(y_test)):
		if y_test[i]==0.0:
			y_test[i]=0.0000001
		predi=str(round(((pr[i]-y_test[i])/y_test[i])*100,2))+' %'
		print file[i]+' '*(20-len(file[i])),' '*(20-len(predi))+ predi, ' '*(20-len(str(y_test[i])))+str(y_test[i]) , ' '*(20-len(str(round(pr[i],2))))+str(round(pr[i],2)),' '*(20-len(str(round((y_test[i]-pr[i]),4))))+str(round((y_test[i]-pr[i]),4))
	#print 'Mean square Error',mean_squared_error(X,pr)
	#print 'R2 score',r2_score(X,pr)
	#test(X,y,file,clf.coef_[0],clf.intercept_[0])
	#plot_g(clf)
	return MAE
예제 #23
0
	def KRR_CV(self, trainX, testX, trainY, testY):
		kernel_vals = ['rbf', 'laplacian']
		kernel_indices = [0,1]
		inverse_gamma_vals = [1.0, 10.0, 20.0, 40.0, 80.0]
		alpha_vals = [0.0001, 0.001, 0.01, 0.1, 1.0]
		cv_errors = np.empty([len(kernel_vals)*len(inverse_gamma_vals)*len(alpha_vals), 4])
		i = 0
		for kern in kernel_vals:
			for g in inverse_gamma_vals:
				for a in alpha_vals:
					errors = np.empty([self.cv_split_no, 1])
					kf = KFold(n_splits=self.cv_split_no, random_state=30, shuffle=True)
					j = 0
					for train_indices, validation_indices in kf.split(trainX):
						training_set_X, validation_set_X = trainX[train_indices], trainX[validation_indices]
						training_set_Y, validation_set_Y = trainY[train_indices], trainY[validation_indices]
						regr = KernelRidge(alpha=a, gamma=1.0/g, kernel=kern)
						regr.fit(training_set_X, training_set_Y)
						predY = regr.predict(validation_set_X)
						errorY = np.absolute(predY - validation_set_Y)
						errors[j] = np.mean(errorY)
						j = j + 1
					cv_errors[i,:] = kernel_indices[kernel_vals.index(kern)], g, a, np.mean(errors)
					i = i + 1
		k_opt, g_opt, a_opt, _ = cv_errors[np.argmin(cv_errors[:, 3]), :]
		k_opt = kernel_vals[kernel_indices.index(k_opt)]
		regr = KernelRidge(alpha=a_opt, gamma=1.0/g_opt, kernel=k_opt)
		regr.fit(trainX, trainY)
		predY = regr.predict(testX)
		err_on_opt_params = np.absolute(predY - testY)                 
		return err_on_opt_params
예제 #24
0
def train_select_regressor(X, y, param_grid, label, scalers_dict):
    # Select label
    y_selected = y[label].to_numpy()
    # Standardize y
    y_selected_std = scalers_dict[label].transform(y_selected.reshape(-1, 1))
    # Initialize regressor
    if (grid_search):
        # Instantiate model
        kern_regr = KernelRidge(kernel="rbf")
        # Initialize Grid Search
        reg = GridSearchCV(kern_regr,
                           param_grid,
                           verbose=3,
                           n_jobs=2,
                           scoring='r2')
        # Refit
        reg.fit(X, y_selected_std)
        # Return regressor wrapper
        return MedRegressorWrapper(label, reg.best_estimator_,
                                   reg.best_params_, reg.best_score_)
    else:
        # Instantiate model
        kern_regr = KernelRidge(kernel="rbf", alpha=1, gamma=0.01)
        # Fit
        kern_regr.fit(X, y_selected_std)
        # Return regressor
        return MedRegressorWrapper(label, kern_regr, None, -1)
예제 #25
0
    def generate(self):
        neuroticismModelRightEye = KernelRidge(kernel='rbf',
                                               alpha=0.1,
                                               gamma=0.1)
        # neuroticismModelLeftEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # neuroticismModelFace = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # neuroticismModelSmile = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        #
        # extraversionModelRightEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # extraversionModelLeftEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # extraversionModelFace = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # extraversionModelSmile = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        #
        # conscientiousnessModelRightEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # conscientiousnessModelLeftEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # conscientiousnessModelFace = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # conscientiousnessModelSmile = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        #
        # agreeablenessModelRightEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # agreeablenessModelLeftEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # agreeablenessModelFace = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # agreeablenessModelSmile = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        #
        # opennessModelRightEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # opennessModelLeftEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # opennessModelFace = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
        # opennessModelSmile = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)

        neuroticismModelRightEye.fit(
            self.featuresDict['righteye'],
            self.labelsDict['righteye']['neuroticism'])
예제 #26
0
def ridge_regression(K1, K2, y1, y2, alpha, c):
    n_val, n_train = K2.shape
    clf = KernelRidge(kernel="precomputed", alpha=alpha)
    one_hot_label = np.eye(c)[y1] - 1.0 / c
    clf.fit(K1, one_hot_label)
    z = clf.predict(K2).argmax(axis=1)
    return 1.0 * np.sum(z == y2) / n_val
예제 #27
0
def reg_krr(X, y, kwargs_set):
    model = KernelRidge(alpha = kwargs_set['alfa'])
    if len(X.shape) == 1:
        model.fit(X.values.reshape(-1, 1), y)
    else:
        model.fit(X, y)
    return model
예제 #28
0
def choose_alpha_ridge(X, y, range_C, gammaX, plot_color):
    '''Implement 5 fold cv to determine optimal gamma'''
    
    #Param setup
    kf = KFold(n_splits = 5)
    mean_error=[]; std_error=[];
    
    for C in range_C:
        #Params
        mse_temp = []
        #Model
        model = KernelRidge(alpha= 1.0/(2*C), kernel= 'rbf', gamma=gammaX)    
        
        #5 fold CV           
        for train, test in kf.split(X):
            #Model
            model.fit(X[train], y[train])
            ypred = model.predict(X[test])
            mse = mean_squared_error(y[test], ypred)
            mse_temp.append(mse)
            
        #Get mean & variance
        mean_error.append(np.array(mse_temp).mean())
        std_error.append(np.array(mse_temp).std())
        
    #Plot
    fig = plt.figure(figsize=(15,12))
    plt.errorbar(range_C, mean_error, yerr=std_error, color = plot_color)
    plt.xlabel('C')
    plt.ylabel('Mean square error')
    plt.title('Choice of C in kernelised Ridge Regression - 5 fold CV, gamma = {}'.format(gammaX))
    plt.show()
예제 #29
0
    def fit(self,
            features,
            targets,
            cv=5,
            alpha=1e-8,
            scoring_criteria='neg_mean_absolute_error',
            threshold=1e-3):
        """
        Fit the dataset with kernel ridge regression.
        Args:
            features (np.array): features X.
            targets (np.array): targets y.
            cv (int): The numbre of folds in cross validation.
                Default to 5.
            alpha (float): Small positive number.
                Regularization parameter in KRR.
            scoring (str): The scoring strategy to evaluate the
                prediction on test sets. The same as the scoring
                parameter in sklearn.model_selection.GridSearchCV.
                Default to 'neg_mean_absolute_error', i.e. MAE.
            threshold (float): The converged threshold of final
                optimal sigma.
        Returns:
            (float) The optimized sigma.
        """
        st_gamma = -np.inf
        nd_gamma = np.inf
        gamma_trials = np.logspace(-6, 4, 11)
        while (abs(st_gamma - nd_gamma) > threshold):
            kr = GridSearchCV(KernelRidge(kernel='rbf', alpha=alpha,
                                          gamma=0.1),
                              cv=cv,
                              param_grid={"gamma": gamma_trials},
                              return_train_score=True)
            kr.fit(features, targets)
            cv_results = pd.DataFrame(kr.cv_results_)
            st_gamma = cv_results['param_gamma'][cv_results['rank_test_score']
                                                 == 1].iloc[0]
            nd_gamma = cv_results['param_gamma'][cv_results['rank_test_score']
                                                 == 2].iloc[0]
            gamma_trials = np.linspace(min(st_gamma, nd_gamma),
                                       max(st_gamma, nd_gamma), 10)
        gamma = st_gamma

        K = np.exp(-gamma * squareform(pdist(features))**2)
        alphas = np.dot(np.linalg.inv(K + alpha * np.eye(len(features))),
                        targets)
        kkr = KernelRidge(alpha=alpha, gamma=gamma, kernel='rbf')
        kkr.fit(features, targets)

        self.param['n_train'] = len(features)
        self.param['lambda'] = alpha
        self.param['sigma'] = 1 / np.sqrt(2 * gamma)
        self.xU = features
        self.yU = targets
        self.predictor = kkr
        self.alphas = alphas

        return gamma
def AlgoKRR(df_train, df_trainY):  #
    model = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)
    rmsle_cv(model, df_train, df_trainY)
    model.fit(df_train, df_trainY)
    result = model.predict(df_train)
    print("rms value of same set: ",
          np.around(sqrt(mean_squared_error(df_trainY, result)), decimals=7))
    return model
예제 #31
0
파일: lgo.py 프로젝트: aatapa/RLScore
def lgo_sklearn(X,y, groups, regparam):
    logo = LeaveOneGroupOut()
    errors = []
    for train, test in logo.split(X, y, groups=groups):
        rls = KernelRidge(kernel="rbf", gamma=0.01)
        rls.fit(X[train], y[train])
        p = rls.predict(X[test])
        e = sqerror(y[test], p)       
        errors.append(e)
    return np.mean(errors)
예제 #32
0
파일: lpo.py 프로젝트: aatapa/RLScore
def lpo_sklearn(X,y, regparam):
    lpo = LeavePOut(p=2)
    preda = []
    predb = []
    for train, test in lpo.split(X):
        rls = KernelRidge(kernel="rbf", gamma=0.01)
        rls.fit(X[train], y[train])
        p = rls.predict(X[test])
        preda.append(p[0])
        predb.append(p[1])
    return preda, predb
class VADEstimator(BaseEstimator):
  def fit( self, x , y , size=1 ):
    self.model = Sequential()
    self.model.add(Dense( int( embeddings_dim / 2.0 ) , input_dim=embeddings_dim , init='uniform' , activation='tanh'))
    self.model.add(Dense( int( embeddings_dim / 4.0 ) , init='uniform' , activation='tanh'))
    self.model.add(Dense(size , init='uniform' ) )
    self.model.compile(loss='mse', optimizer='rmsprop')
    self.model = KernelRidge( kernel='rbf' )
    self.model.fit( x , y )
  def predict( self, x ): 
    if isinstance( self.model , Sequential ): return self.model.predict( x , verbose=0 )[ 0 ]
    return self.model.predict( x )
예제 #34
0
    def ANM_causation_score(self,train_size=0.5,independence_criterion='HSIC',metric='linear',regression_method='GP'):
        '''
            Measure how likely a given causal direction is true

            Parameters
            ----------
            train_size :
                Fraction of given data used to training phase

            independence_criterion :
                kruskal for Kruskal-Wallis H-test,
                HSIC for Hilbert-Schmidt Independence Criterion

            metric :
                linear, sigmoid, rbf, poly
                kernel function to compute gramm matrix for HSIC
                gaussian kernel is used in :
                Nonlinear causal discovery with additive noise models
                Patrik O. Hoyer et. al

            Returns
            -------
            causal_strength: A float between 0. and 1.
        '''
        Xtrain, Xtest , Ytrain, Ytest = train_test_split(self.X, self.Y, train_size = train_size)
        if regression_method == 'GP':
            _gp = pyGPs.GPR()      # specify model (GP regression)
            _gp.getPosterior(Xtrain, Ytrain) # fit default model (mean zero & rbf kernel) with data
            _gp.optimize(Xtrain, Ytrain)     # optimize hyperparamters (default optimizer: single run minimize)

            #Forward case
            #_gp = KernelRidge(kernel='sigmoid',degree=3)
            #_gp.fit(Xtrain,Ytrain)
            ym, ys2, fm, fs2, lp = _gp.predict(Xtest)
            #_gp.plot()
            #errors_forward = _gp.predict(Xtest) - Ytest
            errors_forward = ym - Ytest
        else:
            _gp = KernelRidge(kernel='sigmoid')
            _gp.fit(Xtrain, Ytrain)
            errors_forward = _gp.predict(Xtest) - Ytest

        #Independence score

        forward_indep_pval = {
            'kruskal': kruskal(errors_forward,Xtest)[1],
            'HSIC': self.HilbertSchmidtNormIC(errors_forward,Xtest,metric=metric)[1]
        }[independence_criterion]


        return {'causal_strength':forward_indep_pval}
예제 #35
0
def plot_kernel_ridge(X, y, gamma=0.5, alpha=0.1):
    # kernel (ridge) regression
    krr = KernelRidge(kernel="rbf", gamma=gamma, alpha=alpha);
    krr.fit(X,y);

    # predict
    x_plot = np.linspace(min(X), max(X), 100)[:,np.newaxis];
    y_plot = krr.predict(x_plot);

    # plot
    plt.figure(figsize=(8,4.8));
    plt.plot(X, y, 'or');
    plt.plot(x_plot, y_plot)
#     plt.title(r"Gaussian Kernel ($\gamma=%0.2f, \alpha=%0.2f$)" % (gamma,alpha), fontsize=16)
    plt.title(r"Gaussian Kernel ($\gamma=%0.2f$)" % (gamma), fontsize=16)
예제 #36
0
def train_kernelRidgeModel(X, y, alpha=1, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None):
    """
    Train a kernel ridge regression model
    """
    model = KernelRidge(
        alpha=alpha, kernel=kernel, gamma=gamma, degree=degree, coef0=coef0, kernel_params=kernel_params
    )
    model = model.fit(X, y)
    return model
def modelfitOne(train_X, train_y, test_X, yd, ImageId, FeatureName):
    n_clf = 1
    # 拟合器
    clf = KernelRidge(kernel='rbf', gamma=6e-4, alpha=2e-2)
    # 训练
    print('-----------------开始训练...------------------')
    clf.fit(train_X, train_y)
    # 预测
    print('-----------------开始预测...------------------')
    pred = clf.predict(test_X)
    predicted = np.zeros(len(FeatureName))
    for i in range(len(FeatureName)):
        if i % 500 == 0:
            print('i =', i)
        else:
            pass
        imageID = ImageId[i]
        clfID = yd[FeatureName[i]]
        predicted[i] = pred[imageID, clfID]
    predicted = predicted*48.+48.
    return predicted
num_folds = 5   #data is divide into 5 time slices
Overall_Y_Pred = np.zeros(len(X))
for i in [t+1 for t in list(range(4))]:
    to_exclude = list(range(i))
    folder_train = np.asarray(to_exclude).astype(int)
    #index_train starts with the first folder
    index_train = index[folder_train];
    index_test = [element for i, element in enumerate(index) if i not in to_exclude]
    print (len(index_test))
    #train set starts with the first folder
    X_train = X[np.hstack(index_train)]
    Y_train = Y[np.hstack(index_train)]
    X_test = X[np.hstack(index_test)]
    Y_test = Y[np.hstack(index_test)]
    # train on training sets
    model.fit(X_train, Y_train)
    Y_test_Pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(Y_test, Y_test_Pred))
    rmse_list.append(rmse)

print (rmse_list)

#Plot:
y = np.asarray(rmse_list)
x = np.asarray([t+1 for t in list(range(4))])
plt.plot(x, y, x, y, 'rs')
plt.title('Number of Folders in Training Set vs. rmse of Test Set')
plt.xlabel('Number of Folders in Training Set')
plt.ylabel('Overall RMSE of Test Set')
plt.grid(True)
plt.show()
def main():
    
    T = 10.0 # Simulation temperature
    dt = 1 * units.fs # MD timestep
    nsteps = 500 # MD number of steps
    mixing = [1,-1,0] # [1.0, -1.0, 0.3] # mixing weights for "real" and ML forces
    lengthscale = 0.6 # KRR Gaussian width.
    gamma = 1 / (2 * lengthscale**2)
    grid_spacing = 0.05
    #     mlmodel = GaussianProcess(corr='squared_exponential', 
    #         # theta0=1e-1, thetaL=1e-4, thetaU=1e+2,
    #         theta0=1., 
    #         random_start=100, normalize=False, nugget=1.0e-2)
    mlmodel = KernelRidge(kernel='rbf', 
                          gamma=gamma, gammaL = gamma/4, gammaU=2*gamma,
                           alpha=5.0e-2, variable_noise=False, max_lhood=True)
    anglerange = sp.arange(0, 2*sp.pi + grid_spacing, grid_spacing)
    X_grid = sp.array([[sp.array([x,y]) for x in anglerange]
                       for y in anglerange]).reshape((len(anglerange)**2, 2))
    ext_field = IgnoranceField(X_grid, y_threshold=1.0e-1, cutoff = 3.)
                           
    # Bootstrap from initial database? uncomment
    data = sp.loadtxt('phi_psi_minener_coarse_1M_md.csv')
    data[:,:2] -= 0.025 # fix because of old round_vector routine
    mlmodel.fit(data[:,:2], data[:,2])
    ext_field.update_cost(mlmodel.X_fit_, mlmodel.y)
    
    # Prepare diagnostic visual effects.
    plt.close('all')
    plt.ion()
    fig, ax = plt.subplots(1, 2, figsize=(24, 13))
    
    atoms = ase.io.read('myplum.xyz')
    with open('data.input', 'r') as file:
        lammpsdata = file.readlines()

    # Set temperature
    MaxwellBoltzmannDistribution(atoms, 0.5 * units.kB * T, force_temp=True)
    # Set total momentum to zero
    p = atoms.get_momenta()
    p -= p.sum(axis=0) / len(atoms)
    atoms.set_momenta(p)
    atoms.rescale_velocities(T)
    
    # Select MD propagator
    mdpropagator = Langevin(atoms, dt, T*units.kB, 1.0e-2, fixcm=True)
    # mdpropagator = MLVerlet(atoms, dt, T)

    # Zero-timestep evaluation and data files setup.
    print("START")
    pot_energy, f = calc_lammps(atoms, preloaded_data=lammpsdata)
    mlmodel.accumulate_data(round_vector(atoms.colvars(), precision=grid_spacing), pot_energy)
    printenergy(atoms, pot_energy)
    try:
        os.remove('atomstraj.xyz')
    except:
        pass
    traj = open("atomstraj.xyz", 'a')
    atoms.write(traj, format='extxyz')
    results, traj_buffer = [], []

    # When in the simulation to update the ML fit -- optional.
    teaching_points = sp.unique((sp.linspace(0, nsteps**(1/3), nsteps/20)**3).astype('int') + 1)

    # MD Loop
    for istep in range(nsteps):
        
        print("Dihedral angles | phi = %.3f, psi = %.3f " % (atoms.phi(), atoms.psi()))
        do_update = False # (istep % 10 == 9) # (istep in teaching_points) or (istep - nsteps == 1) # istep % 20 == 0 #
        mdpropagator.halfstep_1of2(f)
        f, pot_energy, _ = get_all_forces(atoms, mlmodel, grid_spacing, 
                                          extfield=None, mixing=mixing, 
                                          lammpsdata=lammpsdata, do_update=do_update)
        mdpropagator.halfstep_2of2(f)

        # manual cooldown!!!
        if sp.absolute(atoms.get_kinetic_energy() / (1.5 * units.kB * atoms.get_number_of_atoms()) - T) > 50:
            atoms.rescale_velocities(T)

        printenergy(atoms, pot_energy/atoms.get_number_of_atoms(), step=istep)
        if do_update:
            try:
                print("Lengthscale = %.3e, Noise = %.3e" % (1/(2 * mlmodel.gamma)**0.5, mlmodel.noise.mean()))
            except:
                print("")
        if 'datasetplot' not in locals():
            datasetplot = pl.Plot_datapts(ax[0], mlmodel)
        else:
            datasetplot.update()
        if hasattr(mlmodel, 'dual_coef_'):
            if 'my2dplot' not in locals():
                my2dplot = pl.Plot_energy_n_point(ax[1], mlmodel, atoms.colvars().ravel())
            else:
                my2dplot.update_prediction()
                my2dplot.update_current_point(atoms.colvars().ravel())
        fig.canvas.draw()
        # fig.canvas.print_figure('current.png')
        traj_buffer.append(atoms.copy())
        if istep % 1 == 0:
            for at in traj_buffer:
                atoms.write(traj, format='extxyz')
            traj_buffer = []
        results.append(sp.array([atoms.phi(), atoms.psi(), pot_energy]))
    traj.close()
    print("FINISHED")
    sp.savetxt('results.csv', sp.array(results))
    sp.savetxt('mlmodel.dual_coef_.csv', mlmodel.dual_coef_)
    sp.savetxt('mlmodel.X_fit_.csv', mlmodel.X_fit_)
    sp.savetxt('mlmodel.y.csv', mlmodel.y)
    calc = None
    
    return mlmodel
regr = linear_model.LinearRegression()
scores = cross_val_score(regr, data.df[inputVariables].values, data.df['count'].values)
print("Linear Regression cross validation score: ", scores.mean())
regr.fit(X_train_sum, y_train_sum)
print("Linear Regression training score: ", regr.score(X_train_sum, y_train_sum))
print("Linear Regression testing score: ", regr.score(X_test_sum, y_test_sum))



##### Kernel Ridge and Support Vector Regression
#####
## Finding the best parameters
alpha=[1,1e-1,1e-2,1e-3]
for a in alpha:
	kr = KernelRidge(kernel='rbf', alpha=a)
	kr.fit(X_train_sum, y_train_sum)
	print("Kernel Ridge train score: ", kr.score(X_train_sum, y_train_sum), " for alpha = %s" %a)
	print("Kernel Ridge test score: ", kr.score(X_test_sum, y_test_sum), " for alpha = %s" %a)


### Using GridSearchCV
param_grid = { 
	'alpha': [1, 1e-1, 1e-2]
	"gamma": np.logspace(-2, 2, 5)
}
GSKernelRidge = GridSearchCV(KernelRidge(kernel='rbf'), param_grid=param_grid)
GSKernelRidge.fit(X_train_sum, y_train_sum)



예제 #41
0
  affective[ row["Word"].lower() ] = np.array( [ float( row["V.Mean.Sum"] ) , float( row["A.Mean.Sum"] ) , float( row["D.Mean.Sum"] ) ] )

# Expand dictionary of affective words
embeddings_dim = 300
max_words = 100000
embeddings = dict( )
embeddings = Word2Vec.load_word2vec_format( "GoogleNews-vectors-negative300.bin.gz" , binary=True )
train_matrix = [ ]
train_labels = [ ]
for word,scores in affective.items():
  try:
    train_matrix.append( embeddings[word] )
    train_labels.append( scores )
  except: continue
model = KernelRidge( kernel='poly' , degree=4 )
model.fit( train_matrix , train_labels )
textdata = " ".join( open(sys.argv[1] + ".revised.txt",'r').readlines( ) )
tokenizer = Tokenizer(nb_words=max_words, filters=keras.preprocessing.text.base_filter(), lower=True, split=" ")
tokenizer.fit_on_texts( textdata )
for word, index in tokenizer.word_index.items():
  try:
    if not affective.has_key(word) : affective[word] = np.array( model.predict( np.array( embedding[word] ).reshape(1, -1) )[0] )
  except: affective[word] = np.array( [ 5.0 , 5.0 , 5.0 ] )

# Process the textual contents
textdata = "" 
file1 = open(sys.argv[1] + ".revised.txt",'r')
with file1 as myfile: textdata = re.sub( ">", "&gt;" , re.sub("<" , "&lt;" , re.sub( "&" , "&amp;" , re.sub( "   +", "\n\n" , re.sub( "\t" , " ", re.sub( "\r" , "" ,  "".join( myfile.readlines() ) ) ) ) ) ) )
corenlp = StanfordCoreNLP( )
file2 = open(sys.argv[1] + ".annotated.tsv",'w')
file3 = open(sys.argv[1] + ".annotated.xml",'w')
tokenizer = Tokenizer(nb_words=max_features, filters=keras.preprocessing.text.base_filter(), lower=True, split=" ")
tokenizer.fit_on_texts(train_texts)
train_sequences = sequence.pad_sequences( tokenizer.texts_to_sequences( train_texts ) , maxlen=max_sent_len )
test_sequences = sequence.pad_sequences( tokenizer.texts_to_sequences( test_texts ) , maxlen=max_sent_len )
train_matrix = tokenizer.texts_to_matrix( train_texts )
test_matrix = tokenizer.texts_to_matrix( test_texts )
embedding_weights = np.zeros( ( max_features , embeddings_dim ) )
for word,index in tokenizer.word_index.items():
  if index < max_features:
    try: embedding_weights[index,:] = embeddings[word]
    except: embedding_weights[index,:] = np.random.rand( 1 , embeddings_dim )

print ("")
print ("Method = Linear ridge regression with bag-of-words features")
model = KernelRidge( kernel='linear' )
model.fit( train_matrix , train_labels )
results = model.predict( test_matrix )
if not(is_geocoding): 
  print ("RMSE = " + repr( np.sqrt(mean_squared_error( test_labels , results )) ) )
  print ("MAE = " + repr( mean_absolute_error( test_labels , results ) ) )
else: 
  print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) )
  print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) )

print ("")
print ("Method = MLP with bag-of-words features")
np.random.seed(0)
model = Sequential()
model.add(Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(embeddings_dim, activation='relu'))
#MAE for SGD: 9.04117895779
#MSE for SGD 292.104437304
#R2 for SGD 0.954873464267'''


####Develop models using various tuned algorithms above
lr = LinearRegression()
lr.fit(x_train, y_train)
y_predicted = lr.predict(x_test)

svr = SVR(C=10, gamma =1, kernel = 'linear')
svr.fit(x_train_scaled, y_train)
y2 = svr.predict(x_test_scaled)

kr = KernelRidge(alpha=0.0001, coef0=1, degree=1, gamma=0.001, kernel='rbf',kernel_params=None)
kr.fit(x_train_scaled, y_train)
y3 = kr.predict(x_test_scaled)

lasso = Lasso(alpha=1e-09)
lasso.fit(x_train_scaled, y_train)
y4 = lasso.predict(x_test_scaled)

linear_ridge = Ridge(alpha=0.1)
linear_ridge.fit(x_train_scaled,y_train)
y5 = linear_ridge.predict(x_test_scaled)

bayesian_ridge = BayesianRidge(alpha_1=1e-05, alpha_2=10, lambda_1=10, lambda_2=1e-05)
bayesian_ridge.fit(x_train_scaled, y_train)
y6 = bayesian_ridge.predict(x_test_scaled)

sgd = SGDRegressor(alpha=0.1, epsilon=0.001, l1_ratio=0.2, loss='squared_loss', penalty='none', power_t=0.2)
n_alphas = 50
alphas = np.logspace(-1, 8, n_alphas)
ridge = Ridge(fit_intercept=True)
kernel_ridge = KernelRidge(kernel='poly', gamma=1, degree=3, coef0=1)

test_scores_ridge = []
test_scores_kernel = []

for alpha in alphas:
    ridge.set_params(alpha=alpha)
    ridge.fit(X_train_sc, y_train_sc)
    test_mse = mean_squared_error_scorer(ridge, X_test_sc, y_test_sc)
    test_scores_ridge.append(test_mse)

    kernel_ridge.set_params(alpha=alpha)
    kernel_ridge.fit(X_train_sc, y_train_sc)
    test_mse = mean_squared_error_scorer(kernel_ridge, X_test_sc, y_test_sc)
    test_scores_kernel.append(test_mse)


poly = PolynomialNetworkRegressor(degree=3, n_components=2, tol=1e-3,
                                  warm_start=True, random_state=0)

test_scores_poly = []

for alpha in alphas:
    poly.set_params(beta=alpha)
    poly.fit(X_train_sc, y_train_sc)
    test_mse = mean_squared_error_scorer(poly, X_test_sc, y_test_sc)
    test_scores_poly.append(test_mse)
예제 #45
0
파일: 999.py 프로젝트: memoiry/2016-
#############################################################################
# Fit regression model
train_size = 18630
C = 3e6
gamma = 0.01
svr = SVR(kernel='rbf', C=C, gamma=gamma)
alpha = 0.23
gamma1 = 0.01
kr = KernelRidge(kernel='rbf', gamma=gamma1,alpha = alpha)

t0 = time.time()
svr.fit(X[:train_size], y[:train_size])
svr_fit = time.time() - t0

t0 = time.time()
kr.fit(X[:train_size], y[:train_size])
kr_fit = time.time() - t0

t0 = time.time()
y_svr = svr.predict(X_plot)
svr_predict = time.time() - t0

t0 = time.time()
y_kr = kr.predict(X_plot)
kr_predict = time.time() - t0

xk = np.arange(18630+1440)[:,None]
#############################################################################
# look at the results
err1 = np.abs(svr.predict(X)-z)/z
err2 = np.abs(kr.predict(X)-z)/z
            
            #### KERNEL RIDGE REGRESSION
            alphaVec = [0.1, 0.01]
            sigmaVec = np.arange(5.0, 5.5, 0.5)
            
            if len(alphaVec) > 1 or len(sigmaVec) > 1:
                # Grid search of parameters
                param_grid = {"alpha": alphaVec, "kernel": [RBF(length_scale) for length_scale in sigmaVec]}
                kr = KernelRidge()
                kr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid)
            else:
                # Run with pre-defined parameter set
                kr = KernelRidge(alpha=alphaVec[0], kernel='rbf', gamma=sigmaVec[0])
            
            # Fit model
            kr.fit(predictor.reshape(-1,1), predictand.reshape(-1,1))
            
            # Get best parameters
            bestAlpha_kr = kr.best_params_['alpha']
            bestSigma_kr = kr.best_params_['kernel'].length_scale

            # Predict over grid
            kr_fit = kr.predict(predictor_grid.reshape(-1,1))
            
            # Compute derivatives of prediction
            kr_der1 = np.gradient(kr_fit[:,0])
            kr_der2 = np.gradient(kr_der1)
            
            # Estimate decorrelation time KR
            if bestSigma_kr >= 2:
                minDer1 = 0.005 #0.001
예제 #47
0
px = []
py = []
with open('/home/redwards/Desktop/genus_species_analysis/pseudo_coverage.txt', 'r') as fin:
    for l in fin:
        p = l.strip().split("\t")
        px.append(float(p[0]))
        py.append(float(p[1]))

ny = np.array(y)
nx = np.array(x)
pnx = np.array(px)
pny = np.array(py)


kr = KernelRidge(kernel='rbf', gamma=7.5e-5, alpha=0.001)
kr.fit(nx[:, None], ny[:, None])

x_pred = np.linspace(min(x), max(x), 10000)[:, None]
y_pred = kr.predict(x_pred)


kr.fit(pnx[:, None], pny[:, None])
px_pred = np.linspace(min(px), max(px), 10000)[:, None]
py_pred = kr.predict(px_pred)

fig = plt.figure()
ax = fig.add_subplot(111)


"""
These regions come from http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2562909/
예제 #48
0
test_matrix1 = preprocessing.scale( test_matrix1 )
data2 = [ ( [ float(row[i]) for i in range(len(row) - 2) ] , ( float( row[ len(row) - 2 ] ) , float( row[ len(row) - 1 ] ) ) ) for row in csv.reader( open("default_plus_chromatic_features_1059_tracks.txt"), delimiter=',', quoting=csv.QUOTE_NONE) ]
np.random.seed(0)
np.random.shuffle( data2 )
train_size2 = int(len(data2) * percent)
train_matrix2 = np.array( [ features for ( features, label ) in data2[0:train_size2] ] )
test_matrix2 = np.array( [ features for ( features, label ) in data2[train_size2:-1] ] )
train_labels2 = [ label for ( features , label ) in data2[0:train_size2] ]
test_labels2 = [ label for ( features , label ) in data2[train_size2:-1] ]
train_matrix2 = preprocessing.scale( train_matrix2 )
test_matrix2 = preprocessing.scale( test_matrix2 )

print ("")
print ("Method = Linear ridge regression - Default features")
model = KernelRidge( kernel='linear' )
model.fit( train_matrix1 , train_labels1 )
results = model.predict( test_matrix1 )
print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) )
print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) )
print ("Method = Linear ridge regression - Default features + chromatic features")
model = KernelRidge( kernel='linear' )
model.fit( train_matrix2 , train_labels2 )
results = model.predict( test_matrix2 )
print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) )
print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) )

print ("")
print ("Method = Random forest regression - Default features")
model = RandomForestRegressor( n_estimators=100 , random_state=0 )
model.fit( train_matrix1 , train_labels1 )
results = model.predict( test_matrix1 )
예제 #49
0
파일: ridge.py 프로젝트: rahlk/Bellwether
class RidgeMKL:
    """A MKL model in a transductive setting (test points are presented at training time).

    """

    mkls = {
        "align": Align,
        "alignf": Alignf,
        "alignfc": Alignf,
        "uniform": UniformAlignment,
    }

    mkls_low_rank = {
        "align": AlignLowRank,
        "alignf": AlignfLowRank,
        "alignfc": AlignfLowRank,
        "uniform": UniformAlignmentLowRank,
    }

    #  alignf expects kernels to be centered
    centered   = {"alignf", "alignfc"}
    supervised = {"align", "alignf", "alignfc"}

    def __init__(self, lbd=0, method="align", method_init_args={}, low_rank=False):
        """
        :param method: (``string``) "align", "alignf", or "uniform", MKL method to be used.

        :param low_rank: (``bool``) Use low-rank approximations.

        :param method_init_args: (``dict``) Initialization arguments for the MKL methods.

        :param lbd: (``float``) L2-regularization.
        """

        self.method  = method
        if not low_rank:
            self.mkl_model  = self.mkls[method](**method_init_args)
            if method == "alignfc":
                init_args = method_init_args.copy()
                init_args["typ"] = "convex"
                self.mkl_model  = self.mkls[method](**init_args)
        else:
            self.mkl_model  = self.mkls_low_rank[method](**method_init_args)
            if method == "alignfc":
                init_args = method_init_args.copy()
                init_args["typ"] = "convex"
                self.mkl_model  = self.mkls_low_rank[method](**init_args)
        self.lbd        = lbd
        self.low_rank   = low_rank
        self.trained    = False


    def fit(self, Ks, y, holdout=None):
        """Learn weights for kernel matrices or Kinterfaces.

        :param Ks: (``list``) of (``numpy.ndarray``) or of (``Kinterface``) to be aligned.

        :param y: (``numpy.ndarray``) Class labels :math:`y_i \in {-1, 1}` or regression targets.

        :param holdout: (``list``) List of indices to exlude from alignment.
        """

        # Expand kernel interfaces to kernel matrices
        expand = lambda K: K[:, :] if isinstance(K, Kinterface) else K
        Hs     = map(expand, Ks)

        # Assert correct dimensions
        assert Ks[0].shape[0] == len(y)

        # Fit MKL model
        if self.method in self.supervised:
            self.mkl_model.fit(Hs, y, holdout=holdout)
        else:
            self.mkl_model.fit(Hs)

        if self.low_rank:
            self.X = hstack(map(lambda e: sqrt(e[0]) * e[1],
                                zip(self.mkl_model.mu, Hs)))

            if self.method in self.centered:
                self.X = center_kernel_low_rank(self.X)
                self.X[where(isnan(self.X))] = 0

            # Fit ridge model with given lbd and MKL model
            self.ridge = KernelRidge(alpha=self.lbd,
                                     kernel="linear", )

            # Fit ridge on the examples minus the holdout set
            inxs = list(set(range(Hs[0].shape[0])) - set(holdout))
            self.ridge.fit(self.X[inxs], y[inxs])
            self.trained = True

        else:
            # Fit ridge model with given lbd and MKL model
            self.ridge = KernelRidge(alpha=self.lbd,
                                     kernel=self.mkl_model, )

            # Fit ridge on the examples minus the holdout set
            inxs = array(list(set(range(Hs[0].shape[0])) - set(holdout)))
            inxs = inxs.reshape((len(inxs), 1)).astype(int)
            self.ridge.fit(inxs, y[inxs])
            self.trained = True


    def predict(self, inxs):
        """
        Predict values for data on indices inxs (transcductive setting).

        :param inxs: (``list``) Indices of samples to be used for prediction.

        :return: (``numpy.ndarray``) Vector of prediction of regression targets.
        """
        assert self.trained

        if self.low_rank:
            return self.ridge.predict(self.X[inxs])
        else:
            inxs = array(inxs)
            inxs = inxs.reshape((len(inxs), 1)).astype(int)
            return self.ridge.predict(inxs).ravel()
diff_fano = diff_fano[~np.isnan(diff_fano)]
pna.cal_CohenD(diff_fano)










""" temp_script """
x, y = data_tuning_mean[:,:,-10:-1].ravel(), data_tuning_std[:,:,-10:-1].ravel()
kr = KernelRidge()
kr.fit(x,y)

kr = kernel_regression.KernelReg(y, x, ['c'], bw=[np.std(x)/5])
plt.plot(x,y, '.')
plt.plot(x, kr.fit(x)[0], 'o')


for i in range(data_tuning_mean.shape[0]):
    plot_kr(data_tuning_mean[i, :, -3].ravel(), data_tuning_std[i, :, -3].ravel(), color=colors[i], linestyle=linestyles[i])


""" legacy code """
data_neuro_cur = signal_align.select_signal(data_neuro_spk, chan_filter=range( 0,32), sortcode_filter=range(1,4))
data_neuro_cur = signal_align.select_signal(data_neuro_spk, chan_filter=range(33,48), sortcode_filter=range(1,4))
plt.figure()
for i in range(data_neuro_cur['data'].shape[2]):
예제 #51
0
파일: embedding.py 프로젝트: capoe/soapxx
def parametrize_environment_specific(settings, rerun):
    channel_name = settings["embedding_options"]["channel_name"]
    log << log.mg << "Parametrizing" << channel_name << "model" << log.endl
    soap_types = SETTINGS["soap_types"]
    log << "Particle SOAP types are" << ", ".join(soap_types) << log.endl
    # PATHS - for example:
    # { "xyz_file": "data_esol/structures.xyz",
    #   "soap_file": "data_esol/structures.soap",
    #   "kmat_file": "data_esol/kernel.npy",
    #   "targets_file": "data_esol/targets.npy",
    #   "range_file": "data_esol/range.json",
    #   "weights_file": "data_esol/weights.npy" }
    paths = copy.deepcopy(settings["paths"])
    for p,v in paths.iteritems():
        paths[p] = os.path.join(PATH, v)
        log << "Path to %s = %s" % (p, paths[p]) << log.endl
    configs = soap.tools.io.read(paths["xyz_file"])
    # SOAP
    soap_options = SETTINGS["soap_options"][settings["soap_options_ref"]]
    if rerun or not os.path.isfile(paths["soap_file"]):
        log << "Make target: %s" % paths["soap_file"] << log.endl
        soap_configure_default(types=soap_types)
        dset = soap_evaluate(configs, soap_options, paths["soap_file"])
    else:
        log << "Load target: %s" % paths["soap_file"] << log.endl
        dset = soap.DMapMatrixSet(paths["soap_file"])
    # KERNEL
    kernel_options = settings["kernel_options"]
    if rerun or not os.path.isfile(paths["kmat_file"]):
        log << "Make target: %s" % paths["kmat_file"] << log.endl
        K = kernel_evaluate(dset, kernel_options, paths["kmat_file"])
    else:
        log << "Load target: %s" % paths["kmat_file"] << log.endl
        K = np.load(paths["kmat_file"])
    # TARGETS
    target_key = settings["regression_options"]["target_key"]
    if rerun or not os.path.isfile(paths["targets_file"]):
        log << "Make target: %s" % paths["targets_file"] << log.endl
        targets = np.array([float(c.info[target_key]) for c in configs])
        np.save(paths["targets_file"], targets)
    else:
        log << "Load target: %s" % paths["targets_file"] << log.endl
        targets = np.load(paths["targets_file"])
    # MODEL
    regr_options = settings["regression_options"]
    if rerun or not os.path.isfile(paths["weights_file"]):
        log << "Make target: %s" % paths["weights_file"] << log.endl
        y_avg = np.average(targets)
        krr = KernelRidge(
            alpha=regr_options["lreg"],
            kernel='precomputed')
        krr.fit(K**regr_options["xi"], targets)
        y_predict = krr.predict(K**regr_options["xi"])
        kweights = krr.dual_coef_
        np.save(paths["weights_file"], kweights)
        np.save(paths["pred_file"], y_predict)
    else:
        log << "Load target: %s" % paths["weights_file"] << log.endl
        kweights = np.load(paths["weights_file"])
        y_predict = np.load(paths["pred_file"])
    if rerun or not os.path.isfile(paths["range_file"]):
        dset_attr = soap.DMapMatrixSet(paths["soap_file"])
        delta_Ys = kernel_attribute(dset_attr, dset, kernel_options, kweights, regr_options["xi"])
        json.dump(delta_Ys, open(paths["range_file"], "w"))
    else:
        delta_Ys = json.load(open(paths["range_file"]))
예제 #52
0
#from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
print y
print
X = np.random.randn(n_samples, n_features)
print X
#clf = SVR(C=1.0, epsilon=0.2)
clf = KernelRidge(alpha=1.0)
clf.fit(X, y) 

print y[1]
print clf.predict(X[1])
예제 #53
0
	#####################################################################
	# --- RUN THE MODEL: FOR A GIVEN SPLIT AND EACH PARAMETER TRIAL --- #
	#####################################################################

	# For each parameter trial
	for i in xrange(trials):

		# For regression use the Kernel Ridge method
		if model_type == "regression":

			print "\n Starting experiment for trial %d and parameter alpha = %3f\n " % (i, alpha_grid[i])

			# Fit the kernel ridge model
			KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])
			KR.fit(K_train, y_train)

			# predict on the validation and test set
			y_pred = KR.predict(K_val)
			y_pred_test = KR.predict(K_test)
			
			# adjust prediction: needed because the training targets have been normalizaed
			y_pred = y_pred * float(y_train_std) + y_train_mean
			y_pred_test = y_pred_test * float(y_train_std) + y_train_mean

			# root mean squared error on validation
			rmse = np.sqrt(mean_squared_error(y_val, y_pred))
			perf_all_val.append(rmse)

			# root mean squared error in test 
			rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
        sum = 0.
        for i in range(0,len(X_train)):
            sum += alpha[i] * kernel(X_train[i],x)
        return sum
    return f

def score(f, X_test, y_test):
    error = 0.
    for i in range(0, len(X_test)):
        prediction = f(X_test[i])
        if isinstance(prediction,np.ndarray):
            prediction = prediction[0]
        error += pow((prediction - y_test[i]),2)
    return error/len(X_test)

# Make up data
X, y, true_coefficient = make_regression(n_samples=80, n_features=30,
                                         n_informative=20, noise=10, coef=True,
                                         random_state=20140210)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=5)

# Run Scikit Kernel Ridge Regression
clf = KernelRidge()
clf.fit(X_train,y_train)
print 'SCIKIT: mean square test error:', score( clf.predict, X_test, y_test)

# Run this implementation
f = kernel_ridge_regression(X_train,y_train,1)
score_val = score(f, X_test, y_test)
print 'Custom: mean square test error:', score_val
예제 #55
0
    # Choose the number of predicted peptides and their length
    n_predictions = 1000
    y_length = 5

    # Max time (seconds) for the branch and bound search
    max_time = 500

    print('String maximization model on BPPs dataset')
    gs_kernel = GenericStringKernel(AminoAcidFile.blosum62_natural, sigma_position, sigma_amino_acid, n,
                                    is_normalized=True)
    alphabet = gs_kernel.alphabet
    dataset = load_bpps_dataset()

    # Use a regression algorithm to learn the weights first
    print('Learning the regression weights ...')
    learner = KernelRidge(alpha, kernel='precomputed')
    gram_matrix = gs_kernel(dataset.X, dataset.X)
    learner.fit(gram_matrix, dataset.y)
    learned_weights = learner.dual_coef_

    # We can then use the string maximization model with the learned weights
    print('Branch and bound search for the top {} peptides of length {} ...'.format(n_predictions, y_length))
    model = StringMaximizationModel(alphabet, n, gs_kernel, max_time)
    model.fit(dataset.X, learned_weights, y_length)
    peptides, bioactivities = model.predict(n_predictions)

    print('\n')
    print('Peptides | Predicted bioactivities')
    for peptide, bioactivity in zip(peptides, bioactivities):
        print(peptide, bioactivity)
예제 #56
0
class Learner():

    path = 'matrices/'
    inputF = 'inputs.npy'
    stateF = 'states.npy'
    itrF = 'itr.npy'
    inptFile = os.path.join(path, inputF)
    stateFile = os.path.join(path, stateF)
    itrFile = os.path.join(path, itrF)

    itr = np.array([])

    useSHIV = False
    THRESH = 0.45
    ahqp_solver_g = AHQP(sigma=6)
    ahqp_solver_b = AHQP(sigma=5,nu=1e-3)


    def trainModel(self, s=None, a=None):
        """
        Trains model on given states and actions.
        Uses neural net or SVM based on global
        settings.
        """
        states, actions = self.states[3:], self.actions[3:]
        #print "states.shape"
        #print states.shape
        #print "actions.shape"
        #print actions.shape

        if len(self.itr) == 0:
            self.itr = np.array([states.shape[0]])
        else:
            self.itr = np.hstack((self.itr, states.shape[0]))

        '''if states.shape[0] > 2700.0:
            f = os.path.join(self.path, 'statesToValidate.npy')
            np.save(f, states)
            IPython.embed()'''

        
        fits = []

        #actions = actions.ravel()
        self.clf = KernelRidge(alpha=1.0)
        self.clf.kernel = 'rbf'
        print "SIZE: ", states.shape
        self.clf.fit(states, actions)
        #IPython.embed()
        actions_pred = self.clf.predict(states)
        bad_state = np.zeros(actions_pred.shape[0])
        for i in range(actions_pred.shape[0]):
            fit =  LA.norm(actions_pred[i,:] - actions[i,:])
            fits.append(fit)

        med = np.median(np.array(fits))
        for fit in fits:
            if(fit>med):
                bad_state[i] = 1

        IPython.embed()

        if self.useSHIV:
            self.labels = np.zeros(states.shape[0])+1.0
            self.scaler = preprocessing.StandardScaler().fit(states)
            states_proc = self.scaler.transform(states)
            
            good_labels = bad_state == 0.0         
            states_g = states_proc[good_labels,:] 

            bad_labels = bad_state == 1.0 
            states_b = states_proc[bad_labels,:] 
            #IPython.embed()
            self.ahqp_solver_g.assembleKernel(states_g, np.zeros(states_g.shape[0])+1.0)
            self.ahqp_solver_b.assembleKernel(states_b, np.zeros(states_b.shape[0])+1.0)
            #IPython.embed()
            self.ahqp_solver_g.solveQP()
            self.ahqp_solver_b.solveQP()

            #score = self.clf.score(states, actions)
            #print score
        
        self.plot(fits, states, med)

    def askForHelp(self,state):
        if self.useSHIV:
            state = self.scaler.transform(state)
            if self.ahqp_solver_b.predict(state)==1.0:
                return -1.0
            else:
                return self.ahqp_solver_g.predict(state)
        else:
            return -1

    
    def plot(self, fits, states, threshold):
        index = range(len(states))
        t = np.ones(len(index)) * threshold
        plt.figure(1)
        plt.plot(index, fits, color='b', linewidth=4.0)
        plt.plot(index, t, color='r', linewidth=4.0)
        plt.ylabel('Fit')
        plt.xlabel('Index of State')

        plt.show()


    def getAction(self, state):
	"""
	Returns a prediction given the input state.
	Uses neural net or SVM based on global
	settings.
	"""

	return self.clf.predict(state)


    def initModel(self, useSHIV):
        self.useSHIV = useSHIV
        try:
            self.states = np.load(self.stateFile)
            self.actions = np.load(self.inptFile)
        except IOError:
            self.states = np.array([-8,8.75,0,-12,22,0,-15,21.13043404,
                                     0,-12,18.52173996,0,-15,14.173913,
                                     0,-12,8.08695698,0,0,0,0,0])
            self.actions = np.array([0,0,0,0])
        #self.trainModel(self.states, self.actions)

    def updateModel(self, s, a):
	self.states = np.vstack((self.states, s))
	self.actions = np.vstack((self.actions,a))
	#self.trainModel(self.states, self.actions)

    def saveModel(self):
        path = 'matrices/oldData/'
        currT = strftime("%Y-%m-%d %H:%M:%S", gmtime())

        inptFileOut = os.path.join(path, 'inputs' + currT + '.npy')
        stateFileOut = os.path.join(path, 'states' + currT + '.npy')

        np.save(stateFileOut, self.states)
	np.save(inptFileOut, self.actions)
        np.save(self.itrFile, self.itr)
    Xh_tr[:, k] = (Xh_tr[:, k] - mea_h[k]) / sig_h[k]

############## Kernel Ridge Regression ########################################
from sklearn.kernel_ridge import KernelRidge
import scipy.io as sio

mf = sio.loadmat(
    "/data/ISOTROPIC/regression/KRR_rbf_cv_alpha_gamma_sspacing4_tspacing6.mat", squeeze_me=True, struct_as_record=False
)
KRR_alpha_opt = mf["KRR_alpha_opt"]
print("Optimal alpha:", KRR_alpha_opt)
KRR_gamma_opt = mf["KRR_gamma_opt"]
print("Optimal gamma:", KRR_gamma_opt)

kr = KernelRidge(kernel="rbf", alpha=KRR_alpha_opt, gamma=KRR_gamma_opt)
kr.fit(Xl_tr, Xh_tr)


############## Prediction and save to file ####################################
import os

try:
    os.remove("/data/ISOTROPIC/data/KRR_rbf_sspacing4_tspacing6.nc")
except OSError:
    pass
ncfile2 = Dataset("/data/ISOTROPIC/data/KRR_rbf_sspacing4_tspacing6.nc", "w")

ncfile1 = Dataset("/data/ISOTROPIC/data/data_downsampled4.nc", "r")

# create the dimensions
ncfile2.createDimension("Nt", Nt)
예제 #58
0
파일: qm7_atom.py 프로젝트: crcollins/molml
from molml.kernel import AtomKernel

from utils import load_qm7


if __name__ == "__main__":
    # This is just boiler plate code to load the data
    Xin_train, Xin_test, y_train, y_test = load_qm7()

    # Look at just a few examples to be quick
    n_train = 200
    n_test = 200
    Xin_train = Xin_train[:n_train]
    y_train = y_train[:n_train]
    Xin_test = Xin_test[:n_test]
    y_test = y_test[:n_test]

    gamma = 1e-7
    alpha = 1e-7
    kern = AtomKernel(gamma=gamma, transformer=LocalEncodedBond(n_jobs=-1),
                      n_jobs=-1)
    K_train = kern.fit_transform(Xin_train)
    K_test = kern.transform(Xin_test)

    clf = KernelRidge(alpha=alpha, kernel="precomputed")
    clf.fit(K_train, y_train)
    train_error = MAE(clf.predict(K_train), y_train)
    test_error = MAE(clf.predict(K_test), y_test)
    print("Train MAE: %.4f Test MAE: %.4f" % (train_error, test_error))
    print()
예제 #59
0
파일: kron_rls.py 프로젝트: aatapa/RLScore
if __name__=="__main__":
    #trains Kronecker RLS for different sample sizes
    #comparing CPU time and verifying that the learned
    #dual coefficients are same for both methods
    regparam = 1.0
    for size in [10, 20, 40, 60, 80, 100, 500, 1000, 2000, 4000, 6000]:
        X1, X2, y = random_data(size, 100)
        kernel1 = GaussianKernel(X1, gamma=0.01)
        K1 = kernel1.getKM(X1)
        kernel2 = GaussianKernel(X2, gamma=0.01)
        K2 = kernel2.getKM(X2)
        start = time.clock()
        rls = KronRLS(K1=K1, K2=K2, Y=y, regparam=regparam)
        dur = time.clock() - start
        print("RLScore pairs: %d, CPU time: %f" %(size**2, dur))
        #forming full Kronecker product kernel matrix becomes fast
        #unfeasible
        if size <=100:
            K = np.kron(K2, K1)
            start = time.clock()
            ridge = KernelRidge(alpha=regparam, kernel="precomputed")
            ridge.fit(K, y)
            dur = time.clock() - start
            print("sklearn pairs: %d, CPU time: %f" %(size**2, dur))
            sklearn_coef = ridge.dual_coef_
            core_coef = rls.predictor.A.reshape(K1.shape[0], K2.shape[0]).T.ravel()
            print("Are the coefficients same: %r" %np.allclose(sklearn_coef, core_coef))
        else:
            print("sklearn: too much data")
        print "*****"