def test_dataset4(): ''' (2 points) test dataset4''' n= 400 X, Y = RF.load_dataset() assert X.shape == (16,400) assert Y.shape == (400,) d = DT() # train over half of the dataset t = d.train(X[:,::2],Y[::2]) # test on the other half Y_predict = DT.predict(t,X[:,1::2]) accuracy0 = sum(Y[1::2]==Y_predict)/float(n)*2. print('test accuracy of a decision tree:', accuracy0) b = Bag() # train over half of the dataset T = b.train(X[:,::2],Y[::2],21) # test on the other half Y_predict = Bag.predict(T,X[:,1::2]) accuracy1 = sum(Y[1::2]==Y_predict)/float(n)*2. print('test accuracy of a bagging of 21 trees:', accuracy1) r = RF() # train over half of the dataset T = r.train(X[:,::2],Y[::2],21) # test on the other half Y_predict = RF.predict(T,X[:,1::2]) accuracy2 = sum(Y[1::2]==Y_predict)/float(n)*2. print('test accuracy of a random forest of 21 trees:', accuracy2) assert accuracy1 >= accuracy0 assert accuracy2 >= accuracy0 assert accuracy2 >= accuracy1-.05
def step(X, Y, D): ''' Compute one step of Boosting. Input: X: the feature matrix, a numpy matrix of shape p by n. Each element can be int/float/string. Here n is the number data instances in the node, p is the number of attributes. Y: the class labels, a numpy array of length n. Each element can be int/float/string. D: the current weights of instances, a numpy float vector of length n Output: t: the root node of a decision stump trained in this step a: (alpha) the weight of the decision stump, a float scalar. D: the new weights of instances, a numpy float vector of length n ''' ######################################### ## INSERT YOUR CODE HERE t = DS().build_tree(X, Y, D) Y_ = DT.predict(t, X) e = AB.weighted_error_rate(Y, Y_, D) a = AB.compute_alpha(e) D = AB.update_D(D, a, Y, Y_) ######################################### return t, a, D