Ejemplo n.º 1
0
def writePathToSamples(parentDir):
    '''write paths to samples for each labels. The .txt files are generated
    for each block and it is stored at 
    projectPath/baseNameOfparentDir/block_<number>.txt
    Each .txt file contains paths to the samples of that block for
    all the users'''
    userlist = utility.getSubdirectories(parentDir)
    print userlist

    baseName = os.path.basename(parentDir)
    data_dir = os.path.join(projectPath,baseName)
    utility.checkDirectory(data_dir)
#    blocks = ["{:02d}".format(x) for x in range(1,10)]
    for i in range(1,10):
        with open(os.path.join(data_dir,'block_'+str(i)+'.txt'),'w') as f:
            for user in userlist:
                blockPath = os.path.join(parentDir,user,'block_'+str(i))
                if os.path.isdir(blockPath):
                    img_list = os.listdir(blockPath)
                    for img in img_list:
                        img_path = os.path.join(blockPath,img)
                        if not os.path.isfile(img_path) or (img_path.split('.')[-1] != "jpg" 
                                                            and img_path.split('.')[-1] != "png"): 
                            continue
                        f.write("%s\n"%img_path)
Ejemplo n.º 2
0
def classify_svm(feature, labels, model='left'):
    print "---------SVM Classifier-------------"
    modelDir = os.path.join(projectPath, model)
    utility.checkDirectory(modelDir)
    dataTrain, dataTest, labelsTrain, labelsTest = train_test_split(
        feature, labels, test_size=0.20, random_state=42)
    param_grid = [
        {
            'C': [1, 10, 100, 1000],
            'gamma': [1, 0.1, 0.001, 0.0001],
            'kernel': ['linear']
        },
        {
            'C': [1, 10, 100, 1000],
            'gamma': [1, 0.1, 0.001, 0.0001],
            'kernel': ['rbf']
        },
    ]
    svc = svm.SVC()
    clf = grid_search.GridSearchCV(estimator=svc,
                                   param_grid=param_grid,
                                   cv=5,
                                   n_jobs=-2)
    print "Training SVM classifier for grid of C and gamma values to select best parameter\n"
    start = time.time()
    clf.fit(dataTrain, labelsTrain)
    end = time.time()
    elapsed = end - start
    print("Time take  : %f seconds" % elapsed)
    print("Best parameters set found on development set:")
    print()
    print(clf.best_estimator_)
    print()
    print("Grid scores on development set:")
    print()
    for params, mean_score, scores in clf.grid_scores_:
        print("%0.3f (+/-%0.03f) for %r" %
              (mean_score, scores.std() / 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = labelsTest, clf.predict(dataTest)
    print(classification_report(y_true, y_pred))
    print()
    with open(os.path.join(modelDir, model + '_svm.pkl'), 'wb') as fid:
        cPickle.dump(clf, fid)
Ejemplo n.º 3
0
def classify_rfc(feature,labels,model='left'):
    print "---------Random Forest Classifier-------------"

    modelDir = os.path.join(projectPath,model)
    utility.checkDirectory(modelDir)
    dataTrain,dataTest,labelsTrain,labelsTest = train_test_split(feature, 
                                                                        labels, test_size=0.20, 
                                                                        random_state=42)
    param_grid = [
              {'n_estimators': [1, 10, 100, 1000], 'max_features': [10,50,100, 400]},
              ]
    rfc = RandomForestClassifier(n_estimators=10)
    clf = grid_search.GridSearchCV(estimator=rfc, param_grid=param_grid,cv=5,n_jobs=-2)
    print "Classification for "+model+" eye\n"
    print "Training RFC classifier for grid of C and gamma values to select best parameter\n"
    start = time.time()
    clf.fit(dataTrain,labelsTrain)
    end = time.time()
    elapsed = end - start
    print("Time take  : %f seconds"%elapsed)
    print("Best parameters set found on development set:")
    print()
    print(clf.best_estimator_)
    print()
    print("Grid scores on development set:")
    print()
    for params, mean_score, scores in clf.grid_scores_:
        print("%0.3f (+/-%0.03f) for %r"
              % (mean_score, scores.std() / 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = labelsTest, clf.predict(dataTest)
    print(classification_report(y_true, y_pred))
    print()
    with open(os.path.join(modelDir,model+'_5_rfc.pkl'), 'wb') as fid:
        cPickle.dump(clf, fid)