コード例 #1
0
def nonlinear_covariate_mat(dset, labels, meta, tissue, cov_matrix):
    '''Calculates non-linear covariates for a Dataset based on nucleotide resolution RNA-seq
		-dset: Pandas DataFrame with RNA-seq expression values.
		-labels: DataFrame used for assigning variables on meta.
		-meta: DataFrame of potential covariates.
		-cov_matrix: Dataframe were store R^2 values.
		-tissue: class used'''
    y_model = copy.deepcopy(dset)
    x_model = copy.deepcopy(labels)
    cov = copy.deepcopy(meta)
    cov_list = cov.columns
    x_model = x_model[x_model[0] == tissue]
    y_model = y_model[y_model.index.isin(x_model.index)]
    pca = KernelPCA(n_components=None, kernel='rbf', random_state=0)
    pc = pca.fit_transform(y_model)
    x_ = copy.deepcopy(x_model)
    for w in cov_list:
        sys.stderr.write(tissue + " " + w + "\n")
        x_model = copy.deepcopy(x_)
        covariate = pd.DataFrame(cov.loc[:, w])
        if w.startswith('MH') and (cov[w].dtype == 'float64'):
            covariate[w] = covariate.loc[:, w].astype('category').cat.codes
            x_model = assign_val(x_model, covariate, w, 0)
            x_model = pd.get_dummies(x_model)
            lm = KernelRidge(alpha=1, kernel='rbf')
            lm.fit(x_model, pc)
            r2 = lm.score(x_model, pc)
            cov_matrix.loc[w, tissue] = r2
        elif covariate[w].dtype == object:
            covariate[w] = covariate.loc[:, w].astype('category').cat.codes
            x_model = assign_val(x_model, covariate, w, 0)
            x_model = pd.get_dummies(x_model)
            lm = KernelRidge(alpha=1, kernel='rbf')
            lm.fit(x_model, pc)
            r2 = lm.score(x_model, pc)
            cov_matrix.loc[w, tissue] = r2
        elif covariate[w].dtype == 'int64' and w != 'AGE':
            covariate[w] = covariate.loc[:, w].astype('category').cat.codes
            x_model = assign_val(x_model, covariate, w, 0)
            x_model = pd.get_dummies(x_model)
            lm = KernelRidge(alpha=1, kernel='rbf')
            lm.fit(x_model, pc)
            r2 = lm.score(x_model, pc)
            cov_matrix.loc[w, tissue] = r2
        else:
            x_model = assign_val(x_model, covariate, w, 0)
            if x_model[0].max() != 0.0:
                x_model = x_model / x_model.max()
            lm = KernelRidge(alpha=1, kernel='rbf')
            lm.fit(x_model.values.reshape(-1, 1), pc)
            r2 = lm.score(x_model.values.reshape(-1, 1), pc)
            cov_matrix.loc[w, tissue] = r2
    return cov_matrix
コード例 #2
0
def kernel_ridge(g):
    """

    :param split:
    :return:
    """

    x_train, x_test, y_train, y_test = split_data()
    kern = KernelRidge(kernel='rbf', gamma=g).fit(x_train, y_train)
    print("Kernel score training: ", kern.score(x_train, y_train))
    print("Kernel score test: ", kern.score(x_test, y_test))

    predictions = kern.predict(x_test)
    plot_assignments(predictions, y_test)
コード例 #3
0
def choose_krr_kernel(train_x, test_x, train_y, test_y):
    kernels = ['linear', 'rbf', 'laplacian', 'polynomial', 'sigmoid']
    kernel_scores = []
    best_k_score = 0.0
    best_k = ""

    for k in kernels:
        krr = KernelRidge(kernel=k)
        krr.fit(train_x, train_y)
        krr.predict(test_x)
        score = krr.score(test_x, test_y)
        if score > best_k_score:
            best_k_score = score
            best_k = k
        kernel_scores.append(score)

    print(kernel_scores)
    print("Best kernel: " + str(best_k))
    print("Score received: " + str(best_k_score))

    plt.bar(kernels, kernel_scores)
    plt.xlabel('Kernel')
    plt.ylabel('Score')
    plt.xticks(np.arange(len(kernels)), kernels)
    plt.title('Tuning Kernel Hyperparameter for KRR')
    plt.show()
コード例 #4
0
    def train_krrl_linear(self, data):
        train, validacion = data
        x_tr, y_tr = train
        x_val, y_val = validacion
        #print("El set de train tiene {} filas y {} columnas".format(x_tr.shape[0],x_tr.shape[1]))
        #print("El set de validacion tiene {} filas y {} columnas".format(x_val.shape[0],x_val.shape[1]))

        print('Start training KernerRidge with linear kernel...')
        start_time = self.timer()

        krrl = KernelRidge(alpha=1)
        krrl.fit(x_tr, y_tr)
        print("The R2 is: {}".format(krrl.score(x_tr, y_tr)))
        #		print("The alpha choose by CV is:{}".format(krrl.alpha_))
        self.timer(start_time)

        print("Making prediction on validation data")
        y_val = np.expm1(y_val)
        y_val_pred = np.expm1(krrl.predict(x_val))
        mae = mean_absolute_error(y_val, y_val_pred)
        print("El mean absolute error de es {}".format(mae))

        print('Saving model into a pickle')
        try:
            os.mkdir('pickles')
        except:
            pass

        with open('pickles/krrlLinearK.pkl', 'wb') as f:
            pickle.dump(krrl, f)

        print('Making prediction and saving into a csv')
        y_test = krrl.predict(self.x_test)

        return y_test
コード例 #5
0
def prin(X,y,file,dic):
	t=100
	#clf = MLPRegressor(solver=dic['solver'],activation=dic['activation'],hidden_layer_sizes=eval(dic['hls']), batch_size = dic['batch_size'], max_iter=dic['max_iter'])
	#clf = LinearRegression()
	clf=KernelRidge(alpha=0.001,kernel='laplacian',degree=18)
	X_train, X_test, y_train, y_test= cross_validation.train_test_split(X,y,test_size=float(dic['test_size']))
	clf.fit(X_train, y_train)
	
	print 'Training size',len(X_train)
	print 'Testing size',len(X_test)
	#scores = cross_val_score(clf, X, y, cv=5)
	#print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

	accuracy = clf.score(X_train,y_train)
	print 'accuracy',accuracy,'\n'
	print 'RMSE',math.sqrt(metrics.mean_squared_error(y_test,clf.predict(X_test)))
	MAE=metrics.mean_absolute_error(y_test,clf.predict(X_test))
	print 'MAE',MAE 
	#X_test,y_test=X[-t:],y[-t:]
	#file=file[-t:]
	pr=clf.predict(X_test)
	print 'Filename                 Percentage Error         Actual Value      Predicted Value           Difference\n'
	for i in range (len(y_test)):
		if y_test[i]==0.0:
			y_test[i]=0.0000001
		predi=str(round(((pr[i]-y_test[i])/y_test[i])*100,2))+' %'
		print file[i]+' '*(20-len(file[i])),' '*(20-len(predi))+ predi, ' '*(20-len(str(y_test[i])))+str(y_test[i]) , ' '*(20-len(str(round(pr[i],2))))+str(round(pr[i],2)),' '*(20-len(str(round((y_test[i]-pr[i]),4))))+str(round((y_test[i]-pr[i]),4))
	#print 'Mean square Error',mean_squared_error(X,pr)
	#print 'R2 score',r2_score(X,pr)
	#test(X,y,file,clf.coef_[0],clf.intercept_[0])
	#plot_g(clf)
	return MAE
コード例 #6
0
def choose_krr_gamma(train_x, test_x, train_y, test_y):
    gammas = [0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.0]
    gamma_scores = []
    best_g_score = 0.0
    best_g = ""

    for g in gammas:
        krr = KernelRidge(kernel="laplacian", gamma=g)
        krr.fit(train_x, train_y)
        krr.predict(test_x)
        score = krr.score(test_x, test_y)
        if score > best_g_score:
            best_g_score = score
            best_g = g
        gamma_scores.append(score)

    print(gamma_scores)
    print("Best gamma: " + str(best_g))
    print("Score received: " + str(best_g_score))

    plt.plot(gammas, gamma_scores)
    plt.xlabel('Gamma')
    plt.ylabel('Score')
    plt.title('Tuning Gamma Hyperparameter for KRR')
    plt.show()
コード例 #7
0
def choose_krr_alpha(train_x, test_x, train_y, test_y):
    alphas = [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.0]
    alpha_scores = []
    best_a_score = 0.0
    best_a = ""

    for a in alphas:
        krr = KernelRidge(kernel="laplacian", alpha=a)
        krr.fit(train_x, train_y)
        krr.predict(test_x)
        score = krr.score(test_x, test_y)
        if score > best_a_score:
            best_a_score = score
            best_a = a
        alpha_scores.append(score)

    print(alpha_scores)
    print("Best alpha: " + str(best_a))
    print("Score received: " + str(best_a_score))

    plt.plot(alphas, alpha_scores)
    plt.xlabel('Alpha')
    plt.ylabel('Score')
    plt.title('Tuning Alpha Hyperparameter for KRR')
    plt.show()
コード例 #8
0
def krr_predict(krr_params, train_x, test_x, train_y, test_y):
    print("Starting KRR prediction")
    a = krr_params['alpha']
    g = krr_params['gamma']
    k = krr_params['kernel']

    krr = KernelRidge(alpha=a, kernel=k, gamma=g)
    krr.fit(train_x, train_y)
    print("KRR Score: ", krr.score(test_x, test_y))

    cv_score = cross_val_score(krr, test_x, test_y, cv=10)
    print("Cross-Val Standard Deviation: ", np.std(cv_score))

    print("Scores:", krr.score(test_x, test_y))

    return krr.score(test_x, test_y)
コード例 #9
0
ファイル: TensorMolData.py プロジェクト: matk86/TensorMol
 def KRR(self):
     from sklearn.kernel_ridge import KernelRidge
     ti, to = self.LoadData(True)
     print "KRR: input shape", ti.shape, " output shape", to.shape
     #krr = KernelRidge()
     krr = KernelRidge(alpha=0.0001, kernel='rbf')
     trainsize = int(ti.shape[0] * 0.5)
     krr.fit(ti[0:trainsize, :], to[0:trainsize])
     predict = krr.predict(ti[trainsize:, :])
     print predict.shape
     krr_acc_pred = np.zeros((predict.shape[0], 2))
     krr_acc_pred[:, 0] = to[trainsize:].reshape(to[trainsize:].shape[0])
     krr_acc_pred[:, 1] = predict.reshape(predict.shape[0])
     np.savetxt("krr_acc_pred.dat", krr_acc_pred)
     print "KRR train R^2:", krr.score(ti[0:trainsize, :], to[0:trainsize])
     print "KRR test  R^2:", krr.score(ti[trainsize:, :], to[trainsize:])
     return
コード例 #10
0
def train_model(df, featureset_keys, kernel="linear", alpha=1.0, gamma=None, degree=None, coef0=None):

    # Setup Parameters for Model
    kr_args = {"kernel": kernel, "alpha": alpha}

    # Validate parameters for polynomial
    if kernel == "polynomial":
        if degree is None or coef0 is None:
            print("Must provide a parameter for degree and coef0")
            return None
        else:
            kr_args["gamma"] = gamma
            kr_args["degree"] = degree
            kr_args["coef0"] = coef0

    # Initialize the figure size
    plt.figure(figsize=(20, 10))

    # Store the results of each training run
    predictions = []
    scores = []

    # Save the best model to return
    best_model = None
    baseline = 0.0

    i = 0
    for train, test in RepeatedKFold(n_splits=5, n_repeats=1).split(df):

        # Split dataset
        train_x, train_y = (df.iloc[train])[featureset_keys], (df.iloc[train])['happiness_score']
        test_x, test_y =(df.iloc[test])[featureset_keys], (df.iloc[test])['happiness_score']

        # Initialise model
        kr_model = KernelRidge(**kr_args)

        # Train model
        kr_model.fit(train_x, train_y)

        # Evaluate model
        pred_y = kr_model.predict(test_x)
        score = kr_model.score(test_x, test_y)

        # Save if better then previous
        if score > baseline:
            best_model = kr_model
        predictions.append(pred_y)
        plt.scatter(test_y, p, label=f"iter {i}")
        scores.append(score)
        i = i+1

    plt.plot(df['happiness_score'], df['happiness_score'], label='actual')
    plt.xlabel('True Values')
    plt.ylabel('Predictions')
    plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1))
    return best_model, plt, scores
    print(pred_scores)
コード例 #11
0
def regression_biking(X_train, X_test, y_train, y_test):
    # allez on apprend !
    clf_ridge = KernelRidge(kernel='rbf')
    clf_ridge.fit(X_train, y_train)

    clf_lasso = Lasso()
    clf_lasso.fit(X_train, y_train)

    clf_linear = LinearRegression()
    clf_linear.fit(X_train, y_train)

    # show me the numbers !
    print('[ERREUR DE PREDICTION]')
    print('Classifier Ridge : {}'.format(clf_ridge.score(X_test, y_test)))
    print('Classifier Lasso : {}'.format(clf_lasso.score(X_test, y_test)))
    print('Classifier Linear : {}'.format(clf_linear.score(X_test, y_test)))

    # erreurs d'apprentissage
    print('[ERREUR D\'APPRENTISSAGE]')
    print('Classifier Ridge : {}'.format(clf_ridge.score(X_train, y_train)))
    print('Classifier Lasso : {}'.format(clf_lasso.score(X_train, y_train)))
    print('Classifier Linear : {}'.format(clf_linear.score(X_train, y_train)))

    print('[RUNNING GRID SEARCH]')
    # cross validation
    models = {
        'Ridge': Ridge(),
        'Lasso': Lasso()
    }

    params = {
        'Lasso': {
            'alpha': [1, 5, 10, 20, 30, 50, 70, 100, 1_000, 10_000]
        },
        'Ridge': {
            'alpha': [1, 5, 10, 20, 30, 50, 70, 100, 1_000, 10_000]
        }
    }

    grid = EstimatorSelectionHelper(models, params)
    grid.fit(X_train, y_train, n_jobs=2)
    print(grid.score_summary(sort_by='mean_score', num_rows_per_estimator=5))
def regr(stock, show=False, save=False):

    training_set, test_set, training_label, test_label = get_train_test_data(
        stock, l_max=500, l_ratio=4.0, s_max=5, binary=False)

    # Decision Tree regressor
    '''
    clf = tree.DecisionTreeRegressor()
    #clf = tree.DecisionTreeClassifier(criterion='entropy')
    #clf.fit(data_set[0:training_size],b_label[0:training_size])
    clf.fit(training_set,training_label)
    #print 'Decision Tree:',clf.score(data_set[training_size+1:training_size+1+ length_of_validation], b_label[training_size+1:training_size+1+length_of_validation])
    print 'Decision Tree:',clf.score(test_set,test_label)
    '''
    kernel_rgr = KernelRidge(alpha=1.0, kernel='linear')
    kernel_rgr.fit(training_set, training_label)
    kernel_score = kernel_rgr.score(test_set, test_label)
    print 'Kernel Regression:', kernel_score

    # Gaussian Process Regressor
    '''
    gpr = GaussianProcessRegressor(alpha = 1e-3,n_restarts_optimizer = 10,normalize_y=True)
    gpr.fit(training_set,training_label)
    
    print "Gaussain Process Regression:",gpr.score(test_set,test_label)
    '''

    # SVM with Polynomial model
    '''
    #clf_svm = svm.SVC()
    clf_svm = svm.SVR(kernel='poly', C=1e2, degree=3)
    
    clf_svm.fit(training_set,training_label)
    #print 'SVM:',clf_svm.score(data_set[training_size+1:training_size+1+ length_of_validation], b_label[training_size+1:training_size+1+length_of_validation])
    svm_score = clf_svm.score(test_set,test_label)
    print 'SVM:',svm_score
    '''
    if show:
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        #ax.plot(clf_svm.predict(test_set),label='SVM')
        #ax.plot(clf.predict(test_set),label = 'DT')
        ax.plot(test_label, label='Actual data')
        ax.plot(kernel_rgr.predict(test_set), label='Kernel Rgr')
        ax.set_xlabel("Relative Time Stamp")
        ax.set_ylabel("Normalized Price")
        ax.legend()
        if save:
            #if kernel_score >=0.3:
            fig.savefig("regr_output\\" + stock + '_regr_output_' +
                        str(round(kernel_score, 4)) + '.png',
                        dpi=500)
            plt.close('all')
    return kernel_score  #,svm_score]
コード例 #13
0
    def execute(self, dataset):
        # X - набор свойств, y - результат, зависящий от X
        X, y = dataset

        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=self.validation_fraction)
        X0, X1 = X[:, 0], X[:, 1]
        xx, yy = self.make_meshgrid(X0, X1)

        labels = set(y)
        colors = ListedColormap([
            plt.get_cmap(name="rainbow")(each)
            for each in np.linspace(0, 1, len(labels))
        ])

        classifier = KernelRidge(alpha=self.alpha,
                                 kernel=self.kernel,
                                 degree=self.degree,
                                 coef0=self.coef0,
                                 gamma=self.gamma)

        classifier.fit(X_train, y_train)
        xxr, yyr = xx.ravel(), yy.ravel()
        cxy = np.c_[xx.ravel(), yy.ravel()]
        Z = classifier.predict(cxy)
        Z = Z.reshape(xx.shape)

        plt.clf()
        plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
        plt.scatter(X_train[:, 0],
                    X_train[:, 1],
                    c=y_train,
                    cmap=colors,
                    s=20,
                    edgecolors='k')
        plt.scatter(X_test[:, 0],
                    X_test[:, 1],
                    alpha=0.5,
                    c=y_test,
                    cmap=colors,
                    s=20,
                    edgecolors='k')
        plt.xlim(xx.min(), xx.max())
        plt.ylim(yy.min(), yy.max())
        score = classifier.score(X_test, y_test)
        plt.title('Kernel Ridge Reduction Classification\n score: ' +
                  str(round(score, 5)))
        plt.show()
コード例 #14
0
 def _function(self, x):
     thetaParam = np.power(10, x[0])
     lambdaParam = np.power(10, x[1])
     x = x[2:]
     (Features, targets) = self._prepareDataset()
     kf = KFold(n_splits=10, shuffle=True)
     scoreList = list()
     for train_index, test_index in kf.split(Features):
         regressor = KernelRidge(alpha=lambdaParam,
                                 kernel='rbf',
                                 gamma=thetaParam)
         regressor.fit(X=Features[train_index],
                       y=targets[train_index],
                       sample_weight=x[train_index])
         scoreList.append(
             regressor.score(X=Features[test_index], y=targets[test_index]))
     return np.mean(scoreList)
コード例 #15
0
ファイル: Regresion.py プロジェクト: sanframar/tfg
def seleccionarMejorAlgoritmoRegresion(X_train, y_train, X_test, y_test):
    scores = {}
    '''Importacion de todos los algoritmos que vamos a implementar'''
    from sklearn.kernel_ridge import KernelRidge
    from sklearn import linear_model
    from sklearn.linear_model import Ridge
    from sklearn.tree import DecisionTreeRegressor
    
    '''Declaracion de los algoritmos y entrenamiento de los algortimos'''
    '''Kernel Ridge'''
    kernelRidge = KernelRidge(kernel="polynomial")
    kernelRidge.fit(X_train, y_train)
    scoreKernelRidge = kernelRidge.score(X_test, y_test)
    scores[kernelRidge] = scoreKernelRidge
    
    '''Bayesian Ridge'''
    bayesianRidge = linear_model.BayesianRidge()
    bayesianRidge.fit(X_train, y_train)
    scoreBayesianRidge = bayesianRidge.score(X_test, y_test)
    scores[bayesianRidge] = scoreBayesianRidge
    
    '''Linear Regression'''
    linearRegression = linear_model.LinearRegression()
    linearRegression.fit(X_train, y_train)
    scoreLinearRegression = linearRegression.score(X_test, y_test)
    scores[linearRegression] = scoreLinearRegression
    
    '''Ridge Regression'''
    ridge = Ridge(alpha=1.0)
    ridge.fit(X_train, y_train)
    scoreRidge = ridge.score(X_test, y_test)
    scores[ridge] = scoreRidge
    
    '''Decission Tree Regression'''
    decisionTreeRegressor = DecisionTreeRegressor(random_state=0)
    decisionTreeRegressor.fit(X_train, y_train)
    scoreDecisionTreeRegressor = decisionTreeRegressor.score(X_test, y_test)
    scores[decisionTreeRegressor] = scoreDecisionTreeRegressor
    
    import operator
    return max(scores.items(), key=operator.itemgetter(1))[0]
コード例 #16
0
X, Y = boston.data, boston.target
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.3)
'''
    核岭回归:
        在l2正则化的线性模型(岭回归)的基础上,引入了核技术的概念
        在岭回归中,用w* = ∑β*z,也就是β代替w
        代价函数随之替换一下即可
        使用梯度下降求解,β = (λI + K)^-1 * y
        特点:
            对于中型数据集较快,但对于大数据集就很吃力了
            训练时间复杂度O(n^3),挺高的
            预测时间复杂度O(n)
'''

rg = KernelRidge(alpha=1,
                 kernel='linear',
                 gamma=None,
                 degree=3,
                 coef0=1,
                 kernel_params=None)
rg.fit(X_train, Y_train)
Y_pre = rg.predict(X_test)
rg.score(X_test, Y_test)
'''
    alpha               惩罚项系数
    kernel              核函数的选定
    gamma               核函数的中的一个参数项
    degree              多项式核的程度
    coef0               多项式核和sigmoid核中一个参数设定
    kernel_params       核函数的附加参数
'''
コード例 #17
0
score = 0

oof_predictions = np.zeros(X.shape[0])
for fold, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_valid = X[train_index, :], X[test_index, :]
    y_train, y_valid = y[train_index], y[test_index]

    clf = KernelRidge(alpha=.6)
    clf.fit(X_train, y_train)

    pred0 = clf.predict(X)
    pred1 = clf.predict(test)
    oof_predictions[test_index] = clf.predict(X_valid) #, ntree_limit=model.best_ntree_limit
    predictions0[:, fold] = pred0
    predictions1[:, fold] = pred1
    score += clf.score(X_train, y_train) #.best_score
    print('Fold %d: Score %f'%(fold, clf.score(X_train, y_train))) #    print('Fold %d: Score %f'%(fold, clf)) #


prediction0 = predictions0.mean(axis=1)
prediction1 = predictions1.mean(axis=1)
score /= n_splits
oof_score = r2_score(y, oof_predictions)

print('=====================')
print('Final Score %f'%score)
print ('Final Out-of-Fold Score %f'%oof_score)
print ('=====================')

print("Creating layer 1 prediction CSV files for training and test")
submission         = pd.read_csv('T:/RNA/Baltimore/Jason/ad_hoc/mb/input/sample_submission.csv')
コード例 #18
0
scores = cross_val_score(regr, data.df[inputVariables].values, data.df['count'].values)
print("Linear Regression cross validation score: ", scores.mean())
regr.fit(X_train_sum, y_train_sum)
print("Linear Regression training score: ", regr.score(X_train_sum, y_train_sum))
print("Linear Regression testing score: ", regr.score(X_test_sum, y_test_sum))



##### Kernel Ridge and Support Vector Regression
#####
## Finding the best parameters
alpha=[1,1e-1,1e-2,1e-3]
for a in alpha:
	kr = KernelRidge(kernel='rbf', alpha=a)
	kr.fit(X_train_sum, y_train_sum)
	print("Kernel Ridge train score: ", kr.score(X_train_sum, y_train_sum), " for alpha = %s" %a)
	print("Kernel Ridge test score: ", kr.score(X_test_sum, y_test_sum), " for alpha = %s" %a)


### Using GridSearchCV
param_grid = { 
	'alpha': [1, 1e-1, 1e-2]
	"gamma": np.logspace(-2, 2, 5)
}
GSKernelRidge = GridSearchCV(KernelRidge(kernel='rbf'), param_grid=param_grid)
GSKernelRidge.fit(X_train_sum, y_train_sum)




コード例 #19
0
clfknn.fit(X_train, y_train)

# Ridge regression
clfridge = Ridge(alpha=2)
clfridge.fit(X_train, y_train)

# Kernel Ridge regression
clfkrr = KernelRidge(alpha=0.5)
clfkrr.fit(X_train, y_train)

X_test = X[1000:1800]
y_test = y[1000:1800]

confidenceknn = clfknn.score(X_test, y_test)
confidenceridge = clfridge.score(X_test, y_test)
confidencekrr = clfkrr.score(X_test, y_test)

winner_clf = max(confidenceknn, confidenceridge, confidencekrr)

print('Score for KNN confidence is', confidenceknn)
print('Score for Ridge Regression confidence is', confidenceridge)
print('Score for Kernel Ridge Regression confidence is', confidencekrr)
print('The highest score is', winner_clf)

forecast_set = clfknn.predict(X_lately)
dfreg['Forecast'] = np.nan

last_date = dfreg.iloc[-1].name
last_unix = last_date
next_unix = last_unix + datetime.timedelta(days=1)
コード例 #20
0
ファイル: main.py プロジェクト: BurritoZz/dataScience
print('Quadratic Discriminant Analysis')
quadDisc = QuadraticDiscriminantAnalysis()
quadDisc.fit(X_train, y_train)
y_test_pred = quadDisc.predict(X_test)
matrix = confusion_matrix(y_test, y_test_pred)
score = quadDisc.score(X_test, y_test)
no_selection_performance.append(
    ('Quadratic Discriminant Analysis', score, matrix))

print('Kernel Ridge Regression')
kerRid = KernelRidge(alpha=1.0)
kerRid.fit(X_train, y_train)
y_test_pred = kerRid.predict(X_test)
y_test_pred = [int(round(x)) for x in y_test_pred]
matrix = confusion_matrix(y_test, y_test_pred)
score = kerRid.score(X_test, y_test)
no_selection_performance.append(('Kernel Ridge Regression', score, matrix))

print('SVC')
svc = svm.SVC(C=1,
              class_weight=None,
              coef0=0,
              gamma='scale',
              kernel='rbf',
              shrinking=True,
              tol=1e-1)
svc.fit(X_train, y_train)
y_test_pred = svc.predict(X_test)
matrix = confusion_matrix(y_test, y_test_pred)
score = svc.score(X_test, y_test)
no_selection_performance.append(('SVC', score, matrix))
コード例 #21
0
ファイル: train_model.py プロジェクト: sefabey/wine
import seaborn as sns
import numpy as np

seed = 45

# data prep=====
df= pd.read_csv("wine_quality.csv")
y= df.pop("quality")

X_train, X_test, y_train, y_test = train_test_split(df, y, test_size= 0.3, random_state= seed)

model1= RandomForestRegressor(max_depth=3, random_state=seed)
model1.fit(X_train, y_train)

train_score_rf= model1.score(X_train, y_train)*100
test_score_rf= model1.score(X_test, y_test)*100

with open("metrics.txt", "w") as outfile:
    outfile.write("Random Forest Training variance explained: %2.1f%% \n" % train_score_rf) 
    outfile.write("Random Forest Test variance explained: %2.1f%% \n" % test_score_rf)

model2= KernelRidge(alpha=1)
model2.fit(X_train, y_train)
train_score_kr= model2.score(X_train, y_train)*100
test_score_kr= model2.score (X_test, y_test)*100

with open("metrics.txt", "a") as outfile:
    outfile.write("Kernel Ridge Training variance explained: %2.1f%% \n" %train_score_kr) 
    outfile.write("Kernel Ridge Test variance explained: %2.1f%% \n" %test_score_kr)

コード例 #22
0
from sklearn.kernel_ridge import KernelRidge
import numpy as np

from importation_pandas import importcsv
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix



setX, setY = importcsv()
X_train, X_test, y_train, y_test = train_test_split(setX, setY, test_size=0.3)


clf = KernelRidge(alpha=1.0)
clf.fit(X_train, y_train)
result1 = clf.predict(X_test)
print(clf.score(X_test, y_test))
print(confusion_matrix(y_test,result1))
コード例 #23
0
fig = plt.figure()
ax = fig.add_subplot(111)
red = ax.scatter(Xtrain, ytrain, color='red', marker='+')
knn_plot = ax.plot(Xtest, knn.predict(Xtest), color='green')
kridge_plot = ax.plot(Xtest, kridge.predict(Xtest), color='blue')
base = ax.plot(Xtest, dummy.predict(Xtest), color='orange', linestyle='--')
ax.set_ylabel("output Y", fontsize=20)
ax.set_xlabel("input X", fontsize=20)
fig.legend(["kNN", "KernelRidge", "baseline", "train"],
           scatterpoints=1,
           loc='right',
           ncol=2,
           fontsize=15)
ax.set_title(
    "kNN & KernelRidge Predictions", fontsize=20)


# Compute percentage of accuracy for each predictions
knn_accuracy = knn.score(Xtrain, ytrain)
kridge_accuracy = kridge.score(Xtrain, ytrain)
baseline_accuracy = dummy.score(Xtrain, ytrain)


# Print outputs
print("base model accuracy score: ", baseline_accuracy,
      " - knn model accuracy score: ", knn_accuracy,
      " - kridge accuracy: ", kridge_accuracy)

plt.show()
コード例 #24
0
        np.logspace(-5, 5)
    })
estimator.fit(X_train, Y_train)

best_alpha = estimator.best_params_['alpha']
best_gamma = estimator.best_params_['gamma']

# Train with the best parameters
estimator2 = KernelRidge(alpha=best_alpha,
                         coef0=1,
                         gamma=best_gamma,
                         kernel='laplacian',
                         kernel_params=None)
estimator2.fit(X_train, Y_train)
y_predicted = estimator2.predict(X_test)
r2 = estimator2.score(X_test, Y_test, sample_weight=None)
print('r^2 = ', r2)

mae = mean_absolute_error(y_predicted, Y_test)
print(mae)

# Plotting
n_test = len(Y_test)

Y_testing = []
sg_testing = []
for i in range(n_test):
    for j in range(N_data):
        if Y_test[i, 0] == Y_sp[j, 0]:
            sg_testing.append(sg_sp[j])
            break
コード例 #25
0
#     backarr2=backarr2.reshape(-1, 1)
#     regressor2.fit(backarr2, outarr)
#     mid=regressor2.predict(x)
#     #mid2=classificator.score(x, y)
#     #print (x, y)
#     backarr2=np.append(backarr2, x)
#     outarr=np.append(outarr, y)
#     out=np.append(out, mid)
#     #scores=np.append(scores,mid2)


regressor.fit(backarr2,outarr)
out=regressor.predict(backarr2)
regressor2.fit(backarr2,outarr)
out2=regressor2.predict(backarr2)
res=regressor.score(backarr2,outarr)
res2=regressor2.score(backarr2,outarr)

time1,time2=[],[]
for i in range(dt.date(2008,2,25).toordinal(),dt.date(2014,2,3).toordinal(),7): #make an array of mondays
    time1=np.append(time1,i)
for i in range(dt.date(2014,10,27).toordinal(),dt.date(2018,8,21).toordinal(),7): time2=np.append(time2,i)
for i in np.nditer(time1, op_flags=['readwrite']): i[...]=i-733000
for i in np.nditer(time2, op_flags=['readwrite']): i[...]=i-733000
time1=time1.reshape(-1,1)
time2=time2.reshape(-1,1)
interpol=regressor.predict(time1) #interpolate the original data that has gaps in it to fill the said gaps
interpol2=regressor.predict(time2)
time=np.concatenate((time1,time2))
result=np.concatenate((interpol,interpol2))
コード例 #26
0
ファイル: regresores.py プロジェクト: seba-arriola/entrega1
      np.mean(cv_results2['test_neg_median_absolute_error']))
print('r2:', np.mean(cv_results['test_r2']))

scaler2 = StandardScaler()
scaler2.fit(X_train)
transformed_X_train = scaler.transform(X_train)
transformed_X_test = scaler.transform(X_test)

rbf_kernel = RBF(length_scale=10)
ker_regr_rbf = KernelRidge(kernel=rbf_kernel)

ker_regr_rbf.fit(transformed_X_train, y_train)

ker_rbf_pred = ker_regr_rbf.predict(transformed_X_test)

print("score: %.5f" % (ker_regr_rbf.score(transformed_X_test, y_test)))
print("Error cuadratico medio: %.5f" %
      mean_squared_error(y_test, ker_rbf_pred))

# GRAFICOS PARA COMPARAR
plt.rcParams["figure.figsize"] = (20, 10)
plt.plot(ker_rbf_pred[1:500])
plt.plot(y_reg[1:500])
plt.xlabel('Datos')
plt.ylabel('Tiempo [s]')
plt.legend(['predicciones', 'datos'], loc='upper left')
plt.show()

plt.rcParams["figure.figsize"] = (20, 10)
plt.plot(ker_rbf_pred[501:1000])
plt.plot(y_reg[501:1000])
コード例 #27
0
pylab.plot(x,y_test116[:n],lw=2,label='Gradient Boosting')
pylab.plot(x,y_test117[:n],lw=2,label='Random Forest')
pylab.plot(x,y_test114[:n],lw=2,label='Bagging')
pylab.plot(x,y_test115[:n],lw=2,label='Ada Boost')
pylab.plot(x,y_test113[:n],lw=2,label='ExtraTree')
pylab.xlabel('Observations'); pylab.ylabel('Targets')
pylab.title('Regressors. Test Results. Boston')
pylab.legend(loc=2,fontsize=10); pylab.show()

"""combining regression with kernels"""

# Kernel Ridge; Toy regression 2
reg25=KernelRidge(); reg25.fit(X_train8,y_train8)
y_train825=reg25.predict(X_train8) 
y_test825=reg25.predict(X_test8)
print(reg25.score(X_test8,y_test8))
pylab.figure(figsize=(12,5)); n=30; x=range(n)
pylab.scatter(x,y_test8[:n,0],marker='*',s=200,
              color='darkblue',label='Real data 1')
pylab.scatter(x,y_test8[:n,1],marker='*',s=200,
              color='darkgreen',label='Real data 2')
pylab.plot(x,y_test825[:n,0],lw=2,
           color='steelblue',label='Kernel Ridge 1')
pylab.plot(x,y_test825[:n,1],lw=2,
           color='seagreen',label='Kernel Ridge 2')
pylab.xlabel('Observations'); pylab.ylabel('Targets') 
pylab.title('Kernel Ridge Regressor. Test Results. Toy Regression 2')
pylab.legend(loc=2,fontsize=10); pylab.show()

"""# Unsupervised Learning"""
コード例 #28
0
def main():
    X = pd.read_csv(
        '../data/BlackFriday.csv'
    )  # names =("User_ID", "Product_ID", "Gender", "Age", "Occupation", "City_Category", "Stay_In_Current_City_Years", "Marital_Status,", "Product_Category_1","Product_Category_2","Product_Category_3", "Purchase" ))
    N, d = X.shape
    print(N, d)
    # fill missing values with 0
    # (?) need to calculate percentage of missing value?
    X = X.fillna(0)
    # change gender to 0 and 1
    X['Gender'] = X['Gender'].apply(change_gender)
    # change age to 0 to 6
    X['Age'] = X['Age'].apply(change_age)
    # change city categories to 0 to 2
    X['City_Category'] = X['City_Category'].apply(change_city)
    # change the year to integer
    X['Stay_In_Current_City_Years'] = X['Stay_In_Current_City_Years'].apply(
        change_year)

    #predict gender
    y = np.zeros((N, 1))
    y = X.values[:, 2]
    y = y.astype('int')
    X1 = X
    ID = ['User_ID', 'Product_ID', 'Gender']
    X1 = X1.drop(ID, axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X1,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=42)
    model = LogisticRegression(C=1,
                               fit_intercept=False,
                               solver='lbfgs',
                               multi_class='multinomial')
    model.fit(X_train, y_train)
    print("LogisticRegression(softmax) Training error %.3f" %
          utils.classification_error(model.predict(X_train), y_train))
    print("LogisticRegression(softmax) Validation error %.3f" %
          utils.classification_error(model.predict(X_test), y_test))

    model = linear_model.SGDClassifier(max_iter=1000, tol=1e-3)
    model.fit(X_train, y_train)

    print("logLinearClassifier Training error %.3f" %
          utils.classification_error(model.predict(X_train), y_train))
    print("logLinearClassifier Validation error %.3f" %
          utils.classification_error(model.predict(X_test), y_test))

    #predict the product category1  based on other information.
    y2 = np.zeros((N, 1))
    y2 = X.values[:, 8]
    y2 = y2.astype('int')
    X2 = X
    ID = [
        'User_ID', 'Product_ID', 'Product_Category_1', 'Product_Category_2',
        'Product_Category_3'
    ]
    X2 = X2.drop(ID, axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X2,
                                                        y2,
                                                        test_size=0.2,
                                                        random_state=42)

    model = KNeighborsClassifier(n_neighbors=5, metric='cosine')
    model.fit(X_train, y_train)

    y_pred = model.predict(X_train)
    tr_error = np.mean(y_pred != y_train)

    y_pred = model.predict(X_test)
    te_error = np.mean(y_pred != y_test)
    print("Training error of KNN to predict age: %.3f" % tr_error)
    print("Testing error of KNN to predict age: %.3f" % te_error)
    # Training error of KNN to predict age: 0.363
    #Testing error of KNN to predict age: 0.496

    # Use decision tree to predict
    e_depth = 20
    s_depth = 1

    train_errors = np.zeros(e_depth - s_depth)
    test_errors = np.zeros(e_depth - s_depth)

    for i, d in enumerate(range(s_depth, e_depth)):
        print("\nDepth: %d" % d)

        model = DecisionTreeClassifier(max_depth=d,
                                       criterion='entropy',
                                       random_state=1)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_train)
        tr_error = np.mean(y_pred != y_train)

        y_pred = model.predict(X_test)
        te_error = np.mean(y_pred != y_test)
        print("Training error: %.3f" % tr_error)
        print("Testing error: %.3f" % te_error)

        train_errors[i] = tr_error
        test_errors[i] = te_error

    x_vals = np.arange(s_depth, e_depth)
    plt.title("The effect of tree depth on testing/training error")
    plt.plot(x_vals, train_errors, label="training error")
    plt.plot(x_vals, test_errors, label="testing error")
    plt.xlabel("Depth")
    plt.ylabel("Error")
    plt.legend()

    fname = os.path.join("..", "figs", "trainTest_category1.pdf")
    plt.savefig(fname)
    print("\nFigure saved as '%s'" % fname)

    model = RandomForestClassifier(criterion="entropy",
                                   n_estimators=5,
                                   max_features=5)
    model.fit(X_train, y_train)
    print("RandomForest Training error %.3f" %
          utils.classification_error(model.predict(X_train), y_train))
    print("RandomForest Validation error %.3f" %
          utils.classification_error(model.predict(X_test), y_test))
    #RandomForest Training error 0.027
    #RandomForest Validation error 0.157
    tree = DecisionTreeClassifier(max_depth=13,
                                  criterion='entropy',
                                  random_state=1)
    tree.fit(X_train, y_train)
    y_pred = tree.predict(X_train)
    tr_error = np.mean(y_pred != y_train)

    y_pred = tree.predict(X_test)
    te_error = np.mean(y_pred != y_test)
    print("Decision Tree Training error : %.3f" % tr_error)
    print("Decision Tree Validation error: %.3f" % te_error)
    #Depth: 11
    #Training error: 0.127
    #Testing error: 0.131

    #use softmaxClassifier to predict occputation
    model = LogisticRegression(C=10000,
                               fit_intercept=False,
                               solver='lbfgs',
                               multi_class='multinomial')
    model.fit(X_train, y_train)
    print("LogisticRegression(softmax) Training error %.3f" %
          utils.classification_error(model.predict(X_train), y_train))
    print("LogisticRegression(softmax) Validation error %.3f" %
          utils.classification_error(model.predict(X_test), y_test))
    #LogisticRegression(softmax) Training error 0.651
    #LogisticRegression(softmax) Validation error 0.652

    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.linear_model import LinearRegression
    from sklearn.gaussian_process.kernels import ConstantKernel, RBF
    from sklearn.kernel_ridge import KernelRidge
    from sklearn.gaussian_process import GaussianProcessRegressor
    from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
    from sklearn.metrics import mean_squared_error
    poly = PolynomialFeatures(degree=4)
    X_train_sub = X_train[:1000]
    y_train_sub = y_train[:1000]
    X_train_ = poly.fit_transform(X_train_sub)
    model = LinearRegression()
    model.fit(X_train_, y_train_sub)
    model.score(X_train_, y_train_sub, sample_weight=None)
    y_pred = model.predict(X_train_)
    tr_error = mean_squared_error(y_pred, y_train_sub)

    y_pred = model.predict(X_test)
    te_error = np.mean(y_pred != y_test)
    print("Training error : %.3f" % tr_error)
    print("Validation error: %.3f" % te_error)

    #kernel = DotProduct() + WhiteKernel()
    y2 = np.zeros((N, 1))
    y2 = X.values[:, 8]
    y2 = y2.astype('int')
    X2 = X
    ID = [
        'User_ID', 'Product_ID', 'Product_Category_1', 'Product_Category_2',
        'Product_Category_3'
    ]
    X2 = X2.drop(ID, axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X2,
                                                        y2,
                                                        test_size=0.02,
                                                        random_state=42)
    gpr = GaussianProcessRegressor(kernel=None,
                                   random_state=0).fit(X_train, y_train)
    gpr.score(X_train, y_train)
    y_pred = gpr.predict(X_train)
    tr_error = mean_squared_error(y_pred, y_train)
    y_pred = gpr.predict(X_test)
    te_error = mean_squared_error(y_pred, y_test)
    clf = KernelRidge(alpha=0.5)
    clf.fit(X_train_sub, y_train_sub)
    clf.score(X_train_sub, y_train_sub, sample_weight=None)
コード例 #29
0
train = pandas.read_csv("steel_composition_train.csv", sep=",")
test = pandas.read_csv("steel_composition_test.csv", sep=",")

names = [
    "id", "Carbon", "Nickel", "Manganese", "Sulfur", "Chromium", "Iron",
    "Phosphorus", "Silicon"
]

data_train = train[names]
targets_train = train["Strength"]

tr_len = len(targets_train)

krr2 = KernelRidge(alpha=1, kernel="polynomial", degree=2, coef0=1)
krr2.fit(data_train, targets_train)
krr2_score = krr2.score(data_train, targets_train)

K2TR = krr2.predict(data_train)
E = K2TR - targets_train
E = np.asarray(E)
RMSE2 = np.sqrt(np.dot(np.transpose(E), E) / tr_len)

krr3 = KernelRidge(alpha=1, kernel="polynomial", degree=3, coef0=1)
krr3.fit(data_train, targets_train)
krr3_score = krr3.score(data_train, targets_train)

K3TR = krr3.predict(data_train)
E = K3TR - targets_train
E = np.asarray(E)
RMSE3 = np.sqrt(np.dot(np.transpose(E), E) / tr_len)
コード例 #30
0
def splitData(dataMat,dataLabel):
    indices = np.random.permutation(len(dataMat))
    dataTrainMat = np.array(dataMat)[100:]; dataTrainLabel = np.array(dataLabel)[100:]
    dataTestMat = np.array(dataMat)[:100]; dataTestLabel = np.array(dataLabel)[:100]
    return dataTrainMat ,dataTrainLabel,dataTestMat,dataTestLabel




dataMat,dataLabel =  createAbaloneData()
dataTrainMat ,dataTrainLabel,dataTestMat,dataTestLabel = splitData(dataMat,dataLabel)

#knn
""" knn = KNeighborsClassifier()
knn.fit(dataTrainMat,dataTrainLabel)
print 'knn accucy',knn.score(dataTestMat,dataTestLabel) """

#line regression
clf = KernelRidge(alpha=2.0)
clf.fit(dataTrainMat,dataTrainLabel)
print 'linear regression accucy',clf.score(dataTestMat,map(float,dataTestLabel))

#tree 
""" tree = DecisionTreeClassifier(random_state=2)
tree.fit(dataTrainMat,dataTrainLabel)
print 'tree accucy',tree.score(dataTestMat,dataTestLabel) """




コード例 #31
0
    testSet.append(trainSet[randIndex])
    del trainSet[randIndex]

#训练集
for dataIndex in trainSet:
    x_train.append(xArr[dataIndex])
    y_train.append(yArr[dataIndex])

#测试集
for dataIndex in testSet:
    x_test.append(xArr[dataIndex])
    y_test.append(yArr[dataIndex])

print(x_train)
print(y_train)
print(x_test)
print(y_test)
"""

clf = KernelRidge()

clf.fit(xArr, yArr)
y_predict = clf.predict(xArr)
#y_predict_int = []
#for i in range(len(y_predict)):
#    y_predict_int.append(int(y_predict[i]))

print(yArr)
print(y_predict)
print(clf.score(xArr, yArr))