def select_indices():
    print 'reading in features'
    test_features = Input.load_testdata_caffefeatures(padded=True)
    train_features = Input.load_traindata_caffefeatures(padded=True)

    print 'selecting indices'
    #get indices of features that have a non-zero variance in the test data
    selector1 = VarianceThreshold()
    selector1.fit_transform(test_features)
    indices_test = selector1.get_support(indices=True)

    #get indices of features that have a non-zero variance in the train data
    selector2 = VarianceThreshold()
    selector2.fit_transform(train_features)
    indices_train = selector2.get_support(indices=True)

    #only keep indices that have variance in both test and train data
    indices = list(set(indices_test) & set(indices_train))

    #add 1 to all indices
    indices = [x + 1 for x in indices]

    #save indices to csv file
    myfile = open('caffefeature_indices_padded.csv', 'wb')
    wr = csv.writer(myfile)
    wr.writerow(indices)
'''Simple test file to test whether loading caffefeatures works properly. Selecting percentiles, selecting rows and giving error messages.
@author: Diede Kemper'''

from IO import Input

features = Input.load_validationset_caffefeatures()
print features.shape
print 'should be: 8061x3983'

features = Input.load_traindata_caffefeatures(userows=range(3000, 5500))
print features.shape
print 'should be: 2500x3983'

features = Input.load_validationset_caffefeatures(
    featureSelectionMethod='chi2', Percentile=100)
print features.shape
print 'should be: 8061x3983'

features = Input.load_validationset_caffefeatures(featureSelectionMethod='hoi',
                                                  Percentile=90)
print features.shape
print 'should print error message'

features = Input.load_validationset_caffefeatures(
    featureSelectionMethod='chi2', Percentile=210)
print features.shape
print 'should print error message'

features = Input.load_traindata_caffefeatures(featureSelectionMethod='chi2',
                                              Percentile=5)
print features.shape
Exemple #3
0
import pandas as pd
import time

from sklearn.ensemble import RandomForestClassifier
from IO import Input
from IO import Output

start_time = time.time()

# load train data
df_traindata_caf = Input.load_traindata_caffefeatures()
df_traindata_lab = Input.load_traindata_labels()

# Load test data
df_testdata_caf = Input.load_testdata_caffefeatures()

print("--- load data: %s seconds ---" % round((time.time() - start_time), 2))
start_time = time.time()

x_train = df_traindata_caf
y_train = df_traindata_lab
x_test = df_testdata_caf

# Train model
rf = RandomForestClassifier(n_estimators=500)
rf.fit(x_train, y_train)

print("--- train model: %s seconds ---" % round((time.time() - start_time), 2))
start_time = time.time()

# Predict