예제 #1
0
def do_Lasso_Kfold(y,yname,files,X):
    subject_num = y.shape[0]
    output = Outputsclass(subject_num,y,yname)
    output.prepare_kfold()
    data, idx, img  = load_data(files)
    
    for train, test in cv.StratifiedKFold(np.zeros(subject_num), k = 4):
        # PCA
        pca, data_red = do_PCA(data[train])
        output.pca.append(pca)
        data_red_test = pca.transform(data[test])
        # Build design matrix & test vector
        desmat_cv = np.hstack((data_red,X[train]))
        desmat_cv = np.array(desmat_cv)
        y_cv = y[train]
        test_vec = np.hstack((data_red_test,X[test]))
        test_vec = np.array(test_vec)
        #Lasso
        lasso = do_LASSO(y_cv,desmat_cv)
        output.lasso.append(lasso)
        output.rsq[test] = lasso.score(desmat_cv,y_cv)
        output.adjrsq[test] = 1 - (1 - output.rsq[test])*(subject_num-1-1)/(subject_num-1 - lasso.coef_.shape[0] -1)
        # Prediction
        output.prediction[test] = lasso.predict(test_vec)
        output.pred_errors[test] = y[test] - output.prediction[test]
        print "did prediction, error = ", output.pred_errors[test]
        output.append_kfold(train,test)
        
    return output
예제 #2
0
def do_R_Crossval(y,yname,files,X,gr = 0):
    from LassoPCR_test import load_data
    from LassoPCR_test import do_PCA
    subject_num = y.shape[0]
    output = Outputsclass(subject_num,y,yname)
    data, idx, img  = load_data(files)
    for train, test in cv.LeaveOneOut(subject_num):
        pca, data_red = do_PCA(data[train])
        output.pca.append(pca)
        data_red_test = pca.transform(data[test])
        desmat_cv = np.hstack((data_red,X[train]))
        desmat_cv = np.array(desmat_cv)
        y_cv = y[train]
        if type(gr) == type(y):
            gr_cv = gr[train]
        else:
            gr_cv = 0
        
        test_vec = np.hstack((data_red_test,X[test]))
    
        test_vec = np.array(test_vec)
        mult_lm = do_Regression(y_cv,desmat_cv,gr_cv)
        output.mult_lm.append(mult_lm)
        output.rsq[test] = np.array(base.summary(mult_lm).rx("r.squared")[0])
        output.adjrsq[test] = np.array(base.summary(mult_lm).rx("adj.r.squared")[0])
        
        # Prediction
        predd = dict()
        for i, vec in enumerate(test_vec[0]):  
            name = "roi%02d"%i
            predd[name] = vec
        if type(gr) == type(y):
            predd["group"] = FV([gr[test]])
        
        preddataf = rob.DataFrame(predd)
        output.prediction[test] = stats.predict(mult_lm, preddataf)[0]
        output.pred_errors[test] = y[test] - output.prediction[test]
        #print "did prediction, error = ", output.pred_errors[test]
    return output