def classify_gbc(data_sets, label_sets): # params = {'n_estimators': 100, 'max_depth': 4, 'min_samples_split': 2, # 'learning_rate': 0.01, 'loss': 'deviance', 'verbose': 0} # 网格搜索gbc最优超参数 grid_search(data_sets, label_sets) # 这是网格CV搜索出的最佳参数 100,0.52 params = { 'n_estimators': 100, 'max_depth': 4, 'min_samples_split': 2, 'learning_rate': 0.52, 'loss': 'deviance', 'verbose': 0 } clf = GradientBoostingClassifier(**params) clf.fit(data_sets, label_sets) print(clf.score(data_sets, label_sets)) return clf
temp=groups[f].median() for i in range(0,768): if (dataset.loc[i,f]==0) & (dataset.loc[i,'outcome']==0): dataset.loc[i,f]=temp[0] if (dataset.loc[i,f]==0) & (dataset.loc[i,'outcome']==1): dataset.loc[i,f]=temp[1] dataset = dataset.values X = dataset[:,0:len(dataset[0]) -1] Y = dataset[:, (len(dataset[0])-1)] #this is for decision tree data=[[0,0,0,0,0]] df=pd.DataFrame(data,columns=['feats','depth','split','max_leaf','acc']) for feats in range(2, 7): for dept in range(2, 6): acc = 0 for split in range(5,40,5): for leaf in range(7,10): for i in range(20): X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3) classifier= GradientBoostingClassifier(min_samples_split=split,max_depth=dept,max_features=feats,max_leaf_nodes=leaf) classifier.fit(X_train, Y_train) res = classifier.score(X_test, Y_test) acc = acc + res acc = acc / 20 print('feats:', feats, 'Depth:', dept,'split:',split,'max_leaf',leaf, 'acc:', acc*100) df=df.append({'feats':feats,'depth':dept,'split':split,'max_leaf':leaf,'acc':acc},ignore_index=True) df.to_csv('xgboost.csv', sep=',')
groups=dataset.groupby('_conds') field=['_dewptm','_heatindexm','_hum','_pressurem','_tempm','_vism','_wdird','_wspdm'] for f in field: print("field", f) temp=groups[f].median() for i in range(0, 100945): if(isnull(dataset.loc[i,f])): condition=dataset.loc[i,'_conds'] dataset.loc[i,f]=temp[condition] print("values: ", dataset.loc[i,f]," ; ",temp[condition]) dataset['_heatindexm'].fillna(dataset['_heatindexm'].median(), inplace=True) dataset['_hum'].fillna(dataset['_hum'].median(), inplace=True) dataset['_tempm'].fillna(dataset['_tempm'].median(), inplace=True) dataset['_vism'].fillna(dataset['_vism'].median(), inplace=True) dataset = dataset.values X = dataset[:,1:len(dataset[0])] Y = dataset[:,0] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3) for dept in range(5,8): for feats in range(5,8): classifier= GradientBoostingClassifier(max_depth=dept,max_features=feats) classifier.fit(X_train, Y_train) print("depth",dept) print("Score",classifier.score(X_train, Y_train))
import numpy as np from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier from numpy.ma.testutils import assert_array_almost_equal # Create some data m = 10000 X = np.random.normal(size=(m, 10)) thresh = np.random.normal(size=10) X_transformed = X * (X > thresh) beta = np.random.normal(size=10) y = (np.dot(X_transformed, beta) + np.random.normal(size=m)) > 0 # Train a gradient boosting classifier model = GradientBoostingClassifier() model.fit(X, y) print model.score(X, y) # Inspect pred = model.predict_proba(X) approx = model.loss_._score_to_proba( model.learning_rate * sum(map(lambda est: est.predict(X), model.estimators_[:, 0])) + np.ravel(model.init_.predict(X))) assert_array_almost_equal(pred, approx)
sv.fit(X_train, y_train) print(sv.score(X_test, y_test)) #acc 71,66% RFC.fit(X_train, y_train) print(RFC.score(X_test, y_test)) #acc 69,32% GaussianN.fit(X_train, y_train) print(GaussianN.score(X_test, y_test)) #acc 56,20% KNC.fit(X_train, y_train) print(KNC.score(X_test, y_test)) #acc 59,01% gradientboost.fit(X_train, y_train) print(gradientboost.score(X_test, y_test)) # ac 70,49% xgboost.fit(X_train, y_train) print(xgboost.score(X_test, y_test)) #acc = 73,18% sv_score_array = cross_validate(sv, res_wek, df['klasa'], cv=5, scoring=scoring) rfc = cross_validate(RFC, res_wek, df['klasa'], cv=5, scoring=scoring) MNB_score_array = cross_validate(GaussianN, res_wek, df['klasa'], cv=5,