import MLScripts.Metrics as metrics import MLScripts.Helpers as helper import csv train_frame = dc.dfCleanData(dc.loadTrainData(desc=False)) train_data = dc.convertPandasDataFrameToNumpyArray(train_frame) test_frame = dc.dfCleanDataTest(dc.loadTestData()) test_data = dc.convertPandasDataFrameToNumpyArray(test_frame) dc.describeDataframe(test_frame) decision_tree = tree.DecisionTreeClassifier(max_depth=4) train_x = train_data[:, 1:] train_y = train_data[:, 0] decision_tree.fit(train_x, train_y) cv_score = metrics.crossValidationScore(decision_tree, train_x, train_y, cvCount=5) xTrain, xTest, yTrain, yTest = metrics.traintestSplit(train_x, train_y, randomState=1) cv_tree = tree.DecisionTreeClassifier(max_depth=4) cv_tree.fit(xTrain,yTrain) y_predict = cv_tree.predict(xTest) ta = metrics.trainingAccuracy(yTest, y_predict) rmse = metrics.rmse(yTest, y_predict) nrmse = metrics.nrmse(yTest, y_predict) predictors = dc.getColNames(train_frame)[1:] kfoldAccuracy = metrics.measureKFoldAccuracy(train_frame, decision_tree, predictors, outputClass="Survived", outputClause="Survived", kFolds=10) print("Max Cross Validation Score : ", cv_score.max(), "\nAverage Cross Validation Score : ", cv_score.mean(), "\nExtraTreeCLassifier Score : ", decision_tree.score(xTrain, yTrain), "\nTraining Accuracy : ", ta,
#randomForest.fit(trainX, trainY) testX = testData[:, 1:] #resultsY = randomForest.predict(testX) """ Cross Validation """ # Cross Validation cvCount = 10 crossvalidation = cross_val_score(randomForest, trainX, trainY, cv=cvCount, scoring="accuracy") xTrain, xTest, yTrain, yTest = metric.traintestSplit(trainX, trainY) """ accuracyScores = [] depths = range(1, 26) for depth in depths: icvTree = xgb.XGBClassifier(max_depth=depth, n_estimators=100, nthread=4, seed=0) icvTree.fit(xTrain, yTrain) iyPreds = icvTree.predict(xTest) accuracyScores.append(metric.trainingAccuracy(yTest, iyPreds)) sns.plt.plot(depths, accuracyScores, alpha=0.7) sns.plt.xlabel("Depth Values") sns.plt.ylabel("Accuracy Scores") sns.plt.show() """ crossvalidationTree = xgb.XGBClassifier(n_estimators=100,
testData = dataclean.convertPandasDataFrameToNumpyArray(testFrame) trainX = trainData[:, 1:] trainY = trainData[:, 0] testX = testData[:, 1:] """ Cross Validation """ crossvalidationTree = ensemble.RandomForestRegressor(n_estimators=500, n_jobs=4, random_state=1) cvCount = 10 crossvalidation = Metrics.crossValidationScore( ensemble.GradientBoostingRegressor(random_state=1), trainX, trainY, cvCount=cvCount) xTrain, xTest, yTrain, yTest = Metrics.traintestSplit(trainX, trainY, randomState=1) """ #{'n_estimators': 400, 'max_depth': 6, 'learning_rate': 0.01 if __name__ == "__main__": params = {"max_depth" : [3,4,5,6,7,8], "n_estimators" : [100, 200, 300, 400], "learning_rate" : [0.01, 0.05, 0.1, 0.2, 0.5, 1]} clf = GridSearchCV(crossvalidationTree, params, verbose=1, n_jobs=2, cv=10) clf.fit(trainX, trainY) print("GridSearch : \n", "Best Estimator : ", clf.best_estimator_,
import csv import BikeSharingDemand.Combine.Model as model import MLScripts.Metrics as Metrics trainFrame = model.cleanTrainset(model.loadTrainData()) trainData = model.convertPandasDataFrameToNumpyArray(trainFrame) trainX = trainData[:, 3:] trainYCasReg = trainData[:, 0:2] # [casual, registered] xTrain, xTest, yTrain, yTest = Metrics.traintestSplit(trainX, trainYCasReg) #xgboost = model.selectXGBoost() #xgboost2 = model.selectXGBoost() boostCount = 8 xgboosts = [model.selectXGBoost() for _ in range(boostCount)] combinedRegressor = model.Combiner(xgboosts) combinedRegressor.fit(xTrain, yTrain) yPred = combinedRegressor.predict(xTest) y = [] for i, x in enumerate(yTest): y.append(x[0] + x[1]) rmsle = Metrics.rmsle2(y, yPred) print("RMSLE Score : ", rmsle) """
trainFrame = dataclean.cleanDataset(dataclean.loadTrainData()) trainData = dataclean.convertPandasDataFrameToNumpyArray(trainFrame) testFrame = dataclean.cleanDataset(dataclean.loadTestData(), True) testData = dataclean.convertPandasDataFrameToNumpyArray(testFrame) trainX = trainData[:, 1:] trainY = trainData[:, 0] testX = testData[:, 1:] """ Cross Validation """ xTrain, xTest, yTrain, yTest = Metrics.traintestSplit(trainX, trainY, randomState=1) xgbtree = xgb.XGBRegressor(n_estimators=500, learning_rate=0.01, max_depth=10, seed=1, nthread=4) gbtree = ensemble.GradientBoostingRegressor(n_estimators=400, learning_rate=0.01, max_depth=6, random_state=1, presort=True) randomforest = ensemble.RandomForestRegressor(n_estimators=500, n_jobs=4, random_state=1) decisionTree = tree.DecisionTreeRegressor(presort=True) xgbtree.fit(trainX, trainY) gbtree.fit(trainX, trainY) randomforest.fit(trainX, trainY) decisionTree.fit(trainX, trainY) xgbPredict = xgbtree.predict(testX) gbPredict = gbtree.predict(testX) randomforestPredict = randomforest.predict(testX) decisionTreePredict = decisionTree.predict(testX)
n_estimators=100, learning_rate=0.1, random_state=600)""" trainX = trainData[:, 2:] trainY = trainData[:, 1] #randomForest.fit(trainX, trainY) testX = testData[:, 1:] #resultsY = randomForest.predict(testX) """ Cross Validation """ # Cross Validation cvCount = 10 crossvalidation = Metrics.crossValidationScore(randomForest, trainX, trainY, cvCount=cvCount) xTrain, xTest, yTrain, yTest = Metrics.traintestSplit(trainX, trainY, randomState=1) """ accuracyScores = [] rngs = metrics.frange(0.01, 0.2) for rng in rngs: icvTree = ensemble.GradientBoostingClassifier(n_estimators=100, learning_rate=rng, max_depth=4, random_state=1, warm_start=True) icvTree.fit(xTrain, yTrain) iyPreds = icvTree.predict(xTest) accuracyScores.append(metrics.trainingAccuracy(yTest, iyPreds)) sns.plt.plot(rngs, accuracyScores, alpha=0.7)