Esempio n. 1
0
import pandas as pd
import time

from sklearn.ensemble import RandomForestClassifier
from IO import Input
from IO import Output

start_time = time.time()

# load train data
df_trainset_caf = Input.load_trainset_caffefeatures()
df_trainset_lab = Input.load_trainset_labels()

# Load test data
df_validationset_caf = Input.load_validationset_caffefeatures()

print("--- load data: %s seconds ---" % round((time.time() - start_time),2))
start_time = time.time()

x_train = df_trainset_caf
y_train = df_trainset_lab
x_test = df_validationset_caf

# Train model
rf = RandomForestClassifier(n_estimators=500)
rf.fit(x_train, y_train)

print("--- train model: %s seconds ---" % round((time.time() - start_time),2))
start_time = time.time()

# Predict
'''Simple test file to test whether loading caffefeatures works properly. Selecting percentiles, selecting rows and giving error messages.
@author: Diede Kemper'''

from IO import Input

features = Input.load_validationset_caffefeatures()
print features.shape
print 'should be: 8061x3983'

features = Input.load_traindata_caffefeatures(userows=range(3000, 5500))
print features.shape
print 'should be: 2500x3983'

features = Input.load_validationset_caffefeatures(
    featureSelectionMethod='chi2', Percentile=100)
print features.shape
print 'should be: 8061x3983'

features = Input.load_validationset_caffefeatures(featureSelectionMethod='hoi',
                                                  Percentile=90)
print features.shape
print 'should print error message'

features = Input.load_validationset_caffefeatures(
    featureSelectionMethod='chi2', Percentile=210)
print features.shape
print 'should print error message'

features = Input.load_traindata_caffefeatures(featureSelectionMethod='chi2',
                                              Percentile=5)
print features.shape