X = X[idx]
y = y[idx]
# split the data
Xtrain = X[:nTrain, :]
ytrain = y[:nTrain]
Xtest = X[nTrain:, :]
ytest = y[nTrain:]

# train the decision tree
modelDT = DecisionTreeClassifier()
modelDT.fit(Xtrain, ytrain)

#print ypred_DT

# train the boosted DT
modelBoostedDT = BoostedDT(numBoostingIters=100, maxTreeDepth=3)
model = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=2),
                           n_estimators=100,
                           random_state=13)
kfold = cross_validation.KFold(n=n, n_folds=2, random_state=13)
results = cross_validation.cross_val_score(model, X, y, cv=kfold)
modelBoostedDT.fit(Xtrain, ytrain)
model.fit(Xtrain, ytrain)
clf = SVC()
clf.fit(Xtrain, ytrain)
y_pred_rbf1 = clf.predict(Xtest)
scores = list()
scores_rbf = list()

k_model = BoostedDT(numBoostingIters=140, maxTreeDepth=5)
k_model.fit(X, y)
Example #2
0
np.random.seed(13)
np.random.shuffle(idx)
X = X[idx]
y = y[idx]

# split the data
Xtrain = X[:nTrain,:]
ytrain = y[:nTrain]
Xtest = X[nTrain:,:]
ytest = y[nTrain:]
print ytest
# print Xtrain
# train the decision tree
modelDT = DecisionTreeClassifier()
modelDT.fit(Xtrain,ytrain)

# train the boosted DT
modelBoostedDT = BoostedDT(numBoostingIters=100, maxTreeDepth=2)
modelBoostedDT.fit(Xtrain,ytrain)

# output predictions on the remaining data
ypred_DT = modelDT.predict(Xtest)
ypred_BoostedDT = modelBoostedDT.predict(Xtest)

# compute the training accuracy of the model
accuracyDT = accuracy_score(ytest, ypred_DT)
accuracyBoostedDT = accuracy_score(ytest, ypred_BoostedDT)
print ','.join(str(e) for e in ypred_BoostedDT.astype(int))

print "Decision Tree Accuracy = "+str(accuracyDT)
print "Boosted Decision Tree Accuracy = "+str(accuracyBoostedDT)
n,d = Xdata.shape
nTrain = 0.5*n

idx = np.arange(n)
np.random.seed(22)
np.random.shuffle(idx)
Xdata = Xdata[idx]
ydata = ydata[idx]

boost_iter_list = []
for i in [100,1000,10000]:
    depth_list = []
    for j in [1,2,3]:
        test_accuracy_list = []
        modelBoostedDT = BoostedDT(numBoostingIters=i, maxTreeDepth=j)
        kf = KFold(2000, n_folds=10)
        for train_index, test_index in kf:
            Xtrain, Xtest = Xdata[train_index], Xdata[test_index]
            ytrain, ytest = ydata[train_index], ydata[test_index]
            modelBoostedDT.fit(Xtrain,ytrain) 
            test_ypred_BoostedDT = modelBoostedDT.predict(Xtest)
            test_accuracyBoostedDT = accuracy_score(ytest, test_ypred_BoostedDT)
            test_accuracy_list.append(test_accuracyBoostedDT)
        depth_list.append(np.mean(test_accuracy_list))
    boost_iter_list.append(depth_list)
'''
output
[[0.031000000000000007, 0.016499999999999997, 0.017000000000000005],
 [0.027000000000000003, 0.019000000000000003, 0.013500000000000002],
 [0.026500000000000003, 0.020500000000000001, 0.013000000000000001]]