seed = 47 n = 100 n_split = 50 reg = GradientBoostingRegressor(n_estimators=n, min_samples_split=n_split, learning_rate=0.5, random_state=seed) regs = [] for i in (10, 10, 10, 5): r = clone(reg) r.min_sample_split = i regs.append(r) clf = GradientBoostingClassifier(n_estimators=500, min_samples_split=5, learning_rate=0.5, random_state=seed) stacked = StackedClassiferRegressor(clf, regs=regs) # get bins and labels bins, labels = HazardBins(3) # trained the stacked classifier & regressor start_time = time.time() stacked.fit(train[columns], train["Hazard"], bins, labels) stop_time = time.time() print "training time: %.2fs" % (stop_time - start_time) haz_pred = stacked.predict(train[columns]) train_gini = Gini(haz_pred, train.Hazard) print "Gini (training):", train_gini # print 'Importances: ', clf.feature_importances_
#reg = GradientBoostingRegressor(n_estimators = n, # min_samples_split = n_split, # learning_rate = 0.5, # random_state = seed) #regs = [] #for i in (10, 10, 10, 5): # r = clone(reg) # r.min_sample_split = i # regs.append(r) clf = xgb.XGBClassifier(max_depth=5, n_estimators=n, silent=False, learning_rate=0.3, gamma = 0, seed=seed) stacked = StackedClassiferRegressor(clf, gbm) # get bins and labels bins, labels = HazardBins(3) #labels = range(len(bins)-1) # trained the stacked classifier & regressor start_time = time.time() stacked.fit(train[columns].as_matrix(), train['Hazard'].as_matrix(), bins, labels) stop_time = time.time() print "training time: %.2fs" % (stop_time - start_time) haz_pred = stacked.predict(train[columns].as_matrix()) train_gini = Gini(haz_pred, train.Hazard) print 'Gini (training):', train_gini #print 'Importances: ', clf.feature_importances_