def writePathToSamples(parentDir): '''write paths to samples for each labels. The .txt files are generated for each block and it is stored at projectPath/baseNameOfparentDir/block_<number>.txt Each .txt file contains paths to the samples of that block for all the users''' userlist = utility.getSubdirectories(parentDir) print userlist baseName = os.path.basename(parentDir) data_dir = os.path.join(projectPath,baseName) utility.checkDirectory(data_dir) # blocks = ["{:02d}".format(x) for x in range(1,10)] for i in range(1,10): with open(os.path.join(data_dir,'block_'+str(i)+'.txt'),'w') as f: for user in userlist: blockPath = os.path.join(parentDir,user,'block_'+str(i)) if os.path.isdir(blockPath): img_list = os.listdir(blockPath) for img in img_list: img_path = os.path.join(blockPath,img) if not os.path.isfile(img_path) or (img_path.split('.')[-1] != "jpg" and img_path.split('.')[-1] != "png"): continue f.write("%s\n"%img_path)
def classify_svm(feature, labels, model='left'): print "---------SVM Classifier-------------" modelDir = os.path.join(projectPath, model) utility.checkDirectory(modelDir) dataTrain, dataTest, labelsTrain, labelsTest = train_test_split( feature, labels, test_size=0.20, random_state=42) param_grid = [ { 'C': [1, 10, 100, 1000], 'gamma': [1, 0.1, 0.001, 0.0001], 'kernel': ['linear'] }, { 'C': [1, 10, 100, 1000], 'gamma': [1, 0.1, 0.001, 0.0001], 'kernel': ['rbf'] }, ] svc = svm.SVC() clf = grid_search.GridSearchCV(estimator=svc, param_grid=param_grid, cv=5, n_jobs=-2) print "Training SVM classifier for grid of C and gamma values to select best parameter\n" start = time.time() clf.fit(dataTrain, labelsTrain) end = time.time() elapsed = end - start print("Time take : %f seconds" % elapsed) print("Best parameters set found on development set:") print() print(clf.best_estimator_) print() print("Grid scores on development set:") print() for params, mean_score, scores in clf.grid_scores_: print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() / 2, params)) print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() y_true, y_pred = labelsTest, clf.predict(dataTest) print(classification_report(y_true, y_pred)) print() with open(os.path.join(modelDir, model + '_svm.pkl'), 'wb') as fid: cPickle.dump(clf, fid)
def classify_rfc(feature,labels,model='left'): print "---------Random Forest Classifier-------------" modelDir = os.path.join(projectPath,model) utility.checkDirectory(modelDir) dataTrain,dataTest,labelsTrain,labelsTest = train_test_split(feature, labels, test_size=0.20, random_state=42) param_grid = [ {'n_estimators': [1, 10, 100, 1000], 'max_features': [10,50,100, 400]}, ] rfc = RandomForestClassifier(n_estimators=10) clf = grid_search.GridSearchCV(estimator=rfc, param_grid=param_grid,cv=5,n_jobs=-2) print "Classification for "+model+" eye\n" print "Training RFC classifier for grid of C and gamma values to select best parameter\n" start = time.time() clf.fit(dataTrain,labelsTrain) end = time.time() elapsed = end - start print("Time take : %f seconds"%elapsed) print("Best parameters set found on development set:") print() print(clf.best_estimator_) print() print("Grid scores on development set:") print() for params, mean_score, scores in clf.grid_scores_: print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() / 2, params)) print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() y_true, y_pred = labelsTest, clf.predict(dataTest) print(classification_report(y_true, y_pred)) print() with open(os.path.join(modelDir,model+'_5_rfc.pkl'), 'wb') as fid: cPickle.dump(clf, fid)