r = clone(reg) r.min_sample_split = i regs.append(r) clf = RandomForestClassifier(n_estimators = n, min_samples_split = 5, random_state = seed) stacked = StackedClassiferRegressor(clf, regs = regs) # get bins and labels bins, labels = HazardBins(2) # trained the stacked classifier & regressor start_time = time.time() stacked.fit(train[columns], train['Hazard'], bins, labels) stop_time = time.time() print "training time: %.2fs" % (stop_time - start_time) haz_pred = stacked.predict(train[columns]) train_gini = Gini(haz_pred, train.Hazard) print 'Gini (training):', train_gini #print 'Importances: ', clf.feature_importances_ #train['predicted'] = haz_pred #train['haz_class'] = stacked._classes #g = sns.factorplot(x="haz_class", y="predicted", data=train) #g = sns.FacetGrid(train, col="haz_class") #g = g.map(plt.hist, "predicted") plt.scatter(train.Hazard, haz_pred)
regs = [] for i in (10, 10, 10, 5): r = clone(reg) r.min_sample_split = i regs.append(r) clf = GradientBoostingClassifier(n_estimators=500, min_samples_split=5, learning_rate=0.5, random_state=seed) stacked = StackedClassiferRegressor(clf, regs=regs) # get bins and labels bins, labels = HazardBins(3) # trained the stacked classifier & regressor start_time = time.time() stacked.fit(train[columns], train["Hazard"], bins, labels) stop_time = time.time() print "training time: %.2fs" % (stop_time - start_time) haz_pred = stacked.predict(train[columns]) train_gini = Gini(haz_pred, train.Hazard) print "Gini (training):", train_gini # print 'Importances: ', clf.feature_importances_ # train['predicted'] = haz_pred # train['haz_class'] = stacked._classes # g = sns.factorplot(x="haz_class", y="predicted", data=train) # g = sns.FacetGrid(train, col="haz_class") # g = g.map(plt.hist, "predicted") plt.scatter(train.Hazard, haz_pred)
# r = clone(reg) # r.min_sample_split = i # regs.append(r) clf = xgb.XGBClassifier(max_depth=5, n_estimators=n, silent=False, learning_rate=0.3, gamma = 0, seed=seed) stacked = StackedClassiferRegressor(clf, gbm) # get bins and labels bins, labels = HazardBins(3) #labels = range(len(bins)-1) # trained the stacked classifier & regressor start_time = time.time() stacked.fit(train[columns].as_matrix(), train['Hazard'].as_matrix(), bins, labels) stop_time = time.time() print "training time: %.2fs" % (stop_time - start_time) haz_pred = stacked.predict(train[columns].as_matrix()) train_gini = Gini(haz_pred, train.Hazard) print 'Gini (training):', train_gini #print 'Importances: ', clf.feature_importances_ #train['predicted'] = haz_pred #train['haz_class'] = stacked._classes #g = sns.factorplot(x="haz_class", y="predicted", data=train) #g = sns.FacetGrid(train, col="haz_class") #g = g.map(plt.hist, "predicted") plt.scatter(train.Hazard, haz_pred)