Esempio n. 1
0
def experiment(train_file, test_file, predictor, ntrees, mtry, sample, ignores, parser_header, cpu_cores):
    # Load train data
    trainDataX   = pandas.read_csv(train_file,header=0)
    fmap = preprocessFrame(trainDataX)
    predictorCol = trainDataX.columns[predictor]
    trainDataY   = array(trainDataX.pop(predictorCol))
    trainDataX   = array(popIgnores(trainDataX,ignores))

    timeTrainRF = time()
    rf          = WiseRF(n_estimators=ntrees, n_jobs=cpu_cores)
    rf.fit(trainDataX, trainDataY)
    timeTrainRF = time() - timeTrainRF
            
    # Validation
    testDataX    = pandas.read_csv(test_file,header=0)
    preprocessFrame(testDataX,fmap)
    predictorCol = testDataX.columns[predictor]
    testDataY    = array(testDataX.pop(predictorCol))
    testDataX    = array(popIgnores(testDataX,ignores))

    timeTestRF = time()
    predict    = rf.predict(testDataX)
    print predict
    testScore  = rf.score(testDataX,testDataY)
    timeTestRF = time() - timeTestRF
    print """
Train time: {0}
 Test time: {1}
 Err. rate: {2} %
Trees,Sample,Mtry,TrainTime,TestTime,OOB,ClassErr
{3},NA,NA,{0},{1},NA,{4}""".format(timeTrainRF, timeTestRF, 100*(1-testScore),ntrees,1-testScore)
Esempio n. 2
0
def experiment(train_file, test_file, predictor, ntrees, mtry, sample, ignores,
               parser_header, cpu_cores):
    # Load train data
    trainDataX = pandas.read_csv(train_file, header=0)
    fmap = preprocessFrame(trainDataX)
    predictorCol = trainDataX.columns[predictor]
    trainDataY = array(trainDataX.pop(predictorCol))
    trainDataX = array(popIgnores(trainDataX, ignores))

    timeTrainRF = time()
    rf = WiseRF(n_estimators=ntrees, n_jobs=cpu_cores)
    rf.fit(trainDataX, trainDataY)
    timeTrainRF = time() - timeTrainRF

    # Validation
    testDataX = pandas.read_csv(test_file, header=0)
    preprocessFrame(testDataX, fmap)
    predictorCol = testDataX.columns[predictor]
    testDataY = array(testDataX.pop(predictorCol))
    testDataX = array(popIgnores(testDataX, ignores))

    timeTestRF = time()
    predict = rf.predict(testDataX)
    print predict
    testScore = rf.score(testDataX, testDataY)
    timeTestRF = time() - timeTestRF
    print """
Train time: {0}
 Test time: {1}
 Err. rate: {2} %
Trees,Sample,Mtry,TrainTime,TestTime,OOB,ClassErr
{3},NA,NA,{0},{1},NA,{4}""".format(timeTrainRF, timeTestRF,
                                   100 * (1 - testScore), ntrees,
                                   1 - testScore)
Esempio n. 3
0
    X = np.concatenate([X] +
                       [np.apply_along_axis(rotate, 1, X, angle)
                        for angle in angles])
    Y = np.concatenate([Y for _ in range(3)], axis=0)
    return X, Y

train = np.genfromtxt('train.csv', delimiter=',')[1:]
target = train[:,0]
train = train[:,1:]
test = np.genfromtxt('test.csv', delimiter=',')[1:]

#%% Rotates and nudges dataset, trains predictor
ntrain,ntarget = rotate_dataset(train,target)
ntrain,ntarget = nudge_dataset(ntrain,ntarget)

wtrees = WiseRF(n_jobs=-1,n_estimators=512) 
wtrees.fit(ntrain,ntarget)
wtrees.score(ntrain,ntarget)
result_svm_rbm = wtrees.predict(test)

#%%

f=open('result_wtrees.csv','w')
f.write('ImageId,Label\n')

count=1

for x in result_svm_rbm:
    f.write('%d,%d\n' % (count,x))
    count += 1