seed = 47

n = 100
n_split = 50

reg = GradientBoostingRegressor(n_estimators=n, min_samples_split=n_split, learning_rate=0.5, random_state=seed)

regs = []
for i in (10, 10, 10, 5):
    r = clone(reg)
    r.min_sample_split = i
    regs.append(r)

clf = GradientBoostingClassifier(n_estimators=500, min_samples_split=5, learning_rate=0.5, random_state=seed)

stacked = StackedClassiferRegressor(clf, regs=regs)

# get bins and labels
bins, labels = HazardBins(3)

# trained the stacked classifier & regressor
start_time = time.time()
stacked.fit(train[columns], train["Hazard"], bins, labels)
stop_time = time.time()
print "training time: %.2fs" % (stop_time - start_time)

haz_pred = stacked.predict(train[columns])
train_gini = Gini(haz_pred, train.Hazard)
print "Gini (training):", train_gini
# print 'Importances: ', clf.feature_importances_
#reg = GradientBoostingRegressor(n_estimators = n, 
#                            min_samples_split = n_split, 
#                            learning_rate = 0.5,
#                            random_state = seed)

#regs = []
#for i in (10, 10, 10, 5):
#    r = clone(reg)
#    r.min_sample_split = i    
#    regs.append(r)
    
clf = xgb.XGBClassifier(max_depth=5, n_estimators=n, silent=False,
                       learning_rate=0.3, gamma = 0, seed=seed)

stacked = StackedClassiferRegressor(clf, gbm)

# get bins and labels
bins, labels = HazardBins(3)
#labels = range(len(bins)-1)

# trained the stacked classifier & regressor
start_time = time.time()
stacked.fit(train[columns].as_matrix(), train['Hazard'].as_matrix(), bins, labels)
stop_time = time.time()
print "training time: %.2fs" % (stop_time - start_time)

haz_pred = stacked.predict(train[columns].as_matrix())
train_gini = Gini(haz_pred, train.Hazard)
print 'Gini (training):', train_gini
#print 'Importances: ', clf.feature_importances_