Beispiel #1
0
def test_dataset4():
    ''' (2 points) test dataset4'''
    n= 400
    X, Y = RF.load_dataset()

    assert X.shape == (16,400)
    assert Y.shape == (400,)
    d = DT()
    # train over half of the dataset
    t = d.train(X[:,::2],Y[::2]) 
    # test on the other half
    Y_predict = DT.predict(t,X[:,1::2]) 
    accuracy0 = sum(Y[1::2]==Y_predict)/float(n)*2. 
    print('test accuracy of a decision tree:', accuracy0)

    b = Bag()
    # train over half of the dataset
    T = b.train(X[:,::2],Y[::2],21) 
    # test on the other half
    Y_predict = Bag.predict(T,X[:,1::2]) 
    accuracy1 = sum(Y[1::2]==Y_predict)/float(n)*2. 
    print('test accuracy of a bagging of 21 trees:', accuracy1)

    r = RF()
    # train over half of the dataset
    T = r.train(X[:,::2],Y[::2],21) 
    # test on the other half
    Y_predict = RF.predict(T,X[:,1::2]) 
    accuracy2 = sum(Y[1::2]==Y_predict)/float(n)*2. 
    print('test accuracy of a random forest of 21 trees:', accuracy2)
    assert accuracy1 >= accuracy0
    assert accuracy2 >= accuracy0
    assert accuracy2 >= accuracy1-.05
Beispiel #2
0
    def step(X, Y, D):
        '''
            Compute one step of Boosting.  
            Input:
                X: the feature matrix, a numpy matrix of shape p by n. 
                   Each element can be int/float/string.
                   Here n is the number data instances in the node, p is the number of attributes.
                Y: the class labels, a numpy array of length n. Each element can be int/float/string.
                D: the current weights of instances, a numpy float vector of length n
            Output:
                t:  the root node of a decision stump trained in this step
                a: (alpha) the weight of the decision stump, a float scalar.
                D: the new weights of instances, a numpy float vector of length n
        '''
        #########################################
        ## INSERT YOUR CODE HERE
        t = DS().build_tree(X, Y, D)
        Y_ = DT.predict(t, X)
        e = AB.weighted_error_rate(Y, Y_, D)
        a = AB.compute_alpha(e)
        D = AB.update_D(D, a, Y, Y_)

        #########################################
        return t, a, D