def Bagging(trainSet, iteration, method='DecisionStump', caseNum=0): # reading training data N, L = trainSet.shape # default caseNum is N if (caseNum <= 0): caseNum = N if (method == 'LinearRegression'): # bag is to record g(it) in each iteration bag = np.zeros((iteration, L), float) # dataSet is the for bootstrapping (resampling) dataSet = np.zeros((N, L), float) for it in range(iteration): print("iteration No.", it + 1) # booststrap dataSet for case in range(caseNum): idx = np.random.randint(caseNum) dataSet[case, :] = trainSet[idx, :] w = ML.LinearRegression(dataSet) bag[it, :] = w[:] y = ML.testLinearRegression(dataSet, w) return bag elif (method == 'DecisionStump'): # bag is to record g(it) in each iteration bag = np.zeros((iteration, 3), float) # dataSet is the for bootstrapping (resampling) dataSet = np.zeros((N, L), float) for it in range(iteration): print("iteration No.", it + 1) # booststrap dataSet for case in range(caseNum): idx = np.random.randint(caseNum) dataSet[case, :] = trainSet[idx, :] stump = ML.DecisionStump(dataSet) bag[it, :] = stump[:] y = ML.testDecisionStump(dataSet, stump) return bag
RUN """ import ML import math import time import numpy as np train = ML.csvRead('hw_train.csv') #train=ML.csvRead('TrainData.csv') test = ML.csvRead('hw_test.csv') #test=ML.csvRead('TestData.csv') N, L = train.shape #print('DecisionStump') stump = ML.DecisionStump(train) y = ML.testDecisionStump(test, stump) #tic=time.time() #bag=ML.Bagging(train,30,'LinearRegression',) #bag=ML.Bagging(train,30) bag, aT = ML.AdaBoost_Stump(train, 1000) #toc=time.time() #print('Elapsed time is',toc-tic,'second') # out-sample testing N, L = test.shape