def classify_gbc(data_sets, label_sets):

    # params = {'n_estimators': 100, 'max_depth': 4, 'min_samples_split': 2,
    #           'learning_rate': 0.01, 'loss': 'deviance', 'verbose': 0}

    # 网格搜索gbc最优超参数
    grid_search(data_sets, label_sets)
    # 这是网格CV搜索出的最佳参数 100,0.52
    params = {
        'n_estimators': 100,
        'max_depth': 4,
        'min_samples_split': 2,
        'learning_rate': 0.52,
        'loss': 'deviance',
        'verbose': 0
    }
    clf = GradientBoostingClassifier(**params)
    clf.fit(data_sets, label_sets)
    print(clf.score(data_sets, label_sets))

    return clf
    temp=groups[f].median()
    for i in range(0,768):
        if (dataset.loc[i,f]==0) & (dataset.loc[i,'outcome']==0):
            dataset.loc[i,f]=temp[0]
        if (dataset.loc[i,f]==0) & (dataset.loc[i,'outcome']==1):
            dataset.loc[i,f]=temp[1]


dataset = dataset.values
X = dataset[:,0:len(dataset[0]) -1]
Y = dataset[:, (len(dataset[0])-1)]


#this is for decision tree
data=[[0,0,0,0,0]]
df=pd.DataFrame(data,columns=['feats','depth','split','max_leaf','acc'])
for feats in range(2, 7):
    for dept in range(2, 6):
        acc = 0
        for split in range(5,40,5):
            for leaf in range(7,10):
                for i in range(20):
                    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
                    classifier= GradientBoostingClassifier(min_samples_split=split,max_depth=dept,max_features=feats,max_leaf_nodes=leaf)
                    classifier.fit(X_train, Y_train)
                    res = classifier.score(X_test, Y_test)
                    acc = acc + res
                acc = acc / 20    
                print('feats:', feats, 'Depth:', dept,'split:',split,'max_leaf',leaf, 'acc:', acc*100)
                df=df.append({'feats':feats,'depth':dept,'split':split,'max_leaf':leaf,'acc':acc},ignore_index=True)
df.to_csv('xgboost.csv', sep=',')
Esempio n. 3
0
groups=dataset.groupby('_conds')
field=['_dewptm','_heatindexm','_hum','_pressurem','_tempm','_vism','_wdird','_wspdm']
 
for f in field:
    print("field", f)
    temp=groups[f].median()
    for i in range(0, 100945):
        if(isnull(dataset.loc[i,f])):
            condition=dataset.loc[i,'_conds']
            dataset.loc[i,f]=temp[condition]
            print("values: ", dataset.loc[i,f]," ; ",temp[condition])         
             

dataset['_heatindexm'].fillna(dataset['_heatindexm'].median(), inplace=True)
dataset['_hum'].fillna(dataset['_hum'].median(), inplace=True)
dataset['_tempm'].fillna(dataset['_tempm'].median(), inplace=True)
dataset['_vism'].fillna(dataset['_vism'].median(), inplace=True)

dataset = dataset.values
X = dataset[:,1:len(dataset[0])]
Y = dataset[:,0]


X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
for dept in range(5,8):
    for feats in range(5,8):
        classifier= GradientBoostingClassifier(max_depth=dept,max_features=feats)
        classifier.fit(X_train, Y_train)
        print("depth",dept)
        print("Score",classifier.score(X_train, Y_train))
Esempio n. 4
0
import numpy as np
from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier
from numpy.ma.testutils import assert_array_almost_equal

# Create some data
m = 10000
X = np.random.normal(size=(m, 10))
thresh = np.random.normal(size=10)
X_transformed = X * (X > thresh)
beta = np.random.normal(size=10)
y = (np.dot(X_transformed, beta) + np.random.normal(size=m)) > 0

# Train a gradient boosting classifier
model = GradientBoostingClassifier()
model.fit(X, y)
print model.score(X, y)

# Inspect
pred = model.predict_proba(X)

approx = model.loss_._score_to_proba(
    model.learning_rate *
    sum(map(lambda est: est.predict(X), model.estimators_[:, 0])) +
    np.ravel(model.init_.predict(X)))

assert_array_almost_equal(pred, approx)
Esempio n. 5
0
sv.fit(X_train, y_train)
print(sv.score(X_test, y_test))
#acc 71,66%
RFC.fit(X_train, y_train)
print(RFC.score(X_test, y_test))
#acc 69,32%
GaussianN.fit(X_train, y_train)
print(GaussianN.score(X_test, y_test))
#acc 56,20%

KNC.fit(X_train, y_train)
print(KNC.score(X_test, y_test))
#acc 59,01%
gradientboost.fit(X_train, y_train)
print(gradientboost.score(X_test, y_test))
# ac 70,49%
xgboost.fit(X_train, y_train)
print(xgboost.score(X_test, y_test))
#acc = 73,18%

sv_score_array = cross_validate(sv,
                                res_wek,
                                df['klasa'],
                                cv=5,
                                scoring=scoring)
rfc = cross_validate(RFC, res_wek, df['klasa'], cv=5, scoring=scoring)
MNB_score_array = cross_validate(GaussianN,
                                 res_wek,
                                 df['klasa'],
                                 cv=5,