def experiment(train_file, test_file, predictor, ntrees, mtry, sample, ignores, parser_header, cpu_cores): # Load train data trainDataX = pandas.read_csv(train_file,header=0) fmap = preprocessFrame(trainDataX) predictorCol = trainDataX.columns[predictor] trainDataY = array(trainDataX.pop(predictorCol)) trainDataX = array(popIgnores(trainDataX,ignores)) timeTrainRF = time() rf = WiseRF(n_estimators=ntrees, n_jobs=cpu_cores) rf.fit(trainDataX, trainDataY) timeTrainRF = time() - timeTrainRF # Validation testDataX = pandas.read_csv(test_file,header=0) preprocessFrame(testDataX,fmap) predictorCol = testDataX.columns[predictor] testDataY = array(testDataX.pop(predictorCol)) testDataX = array(popIgnores(testDataX,ignores)) timeTestRF = time() predict = rf.predict(testDataX) print predict testScore = rf.score(testDataX,testDataY) timeTestRF = time() - timeTestRF print """ Train time: {0} Test time: {1} Err. rate: {2} % Trees,Sample,Mtry,TrainTime,TestTime,OOB,ClassErr {3},NA,NA,{0},{1},NA,{4}""".format(timeTrainRF, timeTestRF, 100*(1-testScore),ntrees,1-testScore)
def experiment(train_file, test_file, predictor, ntrees, mtry, sample, ignores, parser_header, cpu_cores): # Load train data trainDataX = pandas.read_csv(train_file, header=0) fmap = preprocessFrame(trainDataX) predictorCol = trainDataX.columns[predictor] trainDataY = array(trainDataX.pop(predictorCol)) trainDataX = array(popIgnores(trainDataX, ignores)) timeTrainRF = time() rf = WiseRF(n_estimators=ntrees, n_jobs=cpu_cores) rf.fit(trainDataX, trainDataY) timeTrainRF = time() - timeTrainRF # Validation testDataX = pandas.read_csv(test_file, header=0) preprocessFrame(testDataX, fmap) predictorCol = testDataX.columns[predictor] testDataY = array(testDataX.pop(predictorCol)) testDataX = array(popIgnores(testDataX, ignores)) timeTestRF = time() predict = rf.predict(testDataX) print predict testScore = rf.score(testDataX, testDataY) timeTestRF = time() - timeTestRF print """ Train time: {0} Test time: {1} Err. rate: {2} % Trees,Sample,Mtry,TrainTime,TestTime,OOB,ClassErr {3},NA,NA,{0},{1},NA,{4}""".format(timeTrainRF, timeTestRF, 100 * (1 - testScore), ntrees, 1 - testScore)
X = np.concatenate([X] + [np.apply_along_axis(rotate, 1, X, angle) for angle in angles]) Y = np.concatenate([Y for _ in range(3)], axis=0) return X, Y train = np.genfromtxt('train.csv', delimiter=',')[1:] target = train[:,0] train = train[:,1:] test = np.genfromtxt('test.csv', delimiter=',')[1:] #%% Rotates and nudges dataset, trains predictor ntrain,ntarget = rotate_dataset(train,target) ntrain,ntarget = nudge_dataset(ntrain,ntarget) wtrees = WiseRF(n_jobs=-1,n_estimators=512) wtrees.fit(ntrain,ntarget) wtrees.score(ntrain,ntarget) result_svm_rbm = wtrees.predict(test) #%% f=open('result_wtrees.csv','w') f.write('ImageId,Label\n') count=1 for x in result_svm_rbm: f.write('%d,%d\n' % (count,x)) count += 1