Ejemplo n.º 1
0
def Bagging(trainSet, iteration, method='DecisionStump', caseNum=0):
    # reading training data
    N, L = trainSet.shape
    # default caseNum is N
    if (caseNum <= 0):
        caseNum = N

    if (method == 'LinearRegression'):
        # bag is to record g(it) in each iteration
        bag = np.zeros((iteration, L), float)
        # dataSet is the for bootstrapping (resampling)
        dataSet = np.zeros((N, L), float)
        for it in range(iteration):
            print("iteration No.", it + 1)
            # booststrap dataSet
            for case in range(caseNum):
                idx = np.random.randint(caseNum)
                dataSet[case, :] = trainSet[idx, :]

            w = ML.LinearRegression(dataSet)
            bag[it, :] = w[:]
            y = ML.testLinearRegression(dataSet, w)
        return bag

    elif (method == 'DecisionStump'):
        # bag is to record g(it) in each iteration
        bag = np.zeros((iteration, 3), float)
        # dataSet is the for bootstrapping (resampling)
        dataSet = np.zeros((N, L), float)
        for it in range(iteration):
            print("iteration No.", it + 1)
            # booststrap dataSet
            for case in range(caseNum):
                idx = np.random.randint(caseNum)
                dataSet[case, :] = trainSet[idx, :]

            stump = ML.DecisionStump(dataSet)
            bag[it, :] = stump[:]
            y = ML.testDecisionStump(dataSet, stump)
        return bag
Ejemplo n.º 2
0
RUN

"""

import ML
import math
import time
import numpy as np

train = ML.csvRead('hw_train.csv')
#train=ML.csvRead('TrainData.csv')
test = ML.csvRead('hw_test.csv')
#test=ML.csvRead('TestData.csv')
N, L = train.shape
#print('DecisionStump')
stump = ML.DecisionStump(train)
y = ML.testDecisionStump(test, stump)

#tic=time.time()

#bag=ML.Bagging(train,30,'LinearRegression',)
#bag=ML.Bagging(train,30)
bag, aT = ML.AdaBoost_Stump(train, 1000)

#toc=time.time()

#print('Elapsed time is',toc-tic,'second')

# out-sample testing
N, L = test.shape