Example #1
0
def crossval():
    """
    perform a crossvalidation on the data (beh + precomputed brain) of all subjects
    """ 
    predscores = []
    actualscores = []
    clust_disj = np.zeros(imgshape)
    for trainidx, testidx in cv.LeaveOneOut(subject_num):
        # n-p training files
        trainconfiles = [cf for i, cf in enumerate(confiles) if trainidx[i]]
        # left out subjects to test with
        testconfiles = [cf for i, cf in enumerate(confiles) if testidx[i]]
        ### get all the files from a leave2out crossval and get clusters
        _, name = os.path.split(testconfiles[0])
        sid = name.split('con')[0][:-1]
        # sidx is the row# of the sid in our pdata variable
        sidx = np.nonzero(pdata.subject == sid)[0][0]
        analysisdirs = []
        for idx in range(subject_num):
            if not idx == sidx:
                left_out = [sidx, idx]
                left_out.sort()
                analysisdirs.append(os.path.join(spmdir,'analysis_lo_%02d_%02d'%(left_out[0],left_out[1]),'thresh_h01_f05'))
        # get labels and clustermeans
        labels, nlabels = get_labels(analysisdirs)
        clustermeans_train = get_clustermeans(labels, nlabels, trainconfiles)
        clustermeans_test = get_clustermeans(labels, nlabels, testconfiles)
        # make desmats
        X_train = np.hstack((desmat[trainidx], clustermeans_train))
        X_test = np.hstack((desmat[testidx], clustermeans_test))
        # fit the model (by determining the best model first)
        varsidx, model = determine_model_all(X_train, responsevar[trainidx])
        # save location of _selected_ clusters
        for clust in range(nlabels):
            if varsidx[behvars+clust]:
                idx = np.where(labels == clust+1)
                clust_disj[idx] += 1
        # and save scores
        prediction = model.predict(X_test[:,varsidx])
        predscores.append(prediction)
        actualscores.append(responsevar[testidx][0])
    # rearrange vectors for error computation
    actualscores = np.array(actualscores)
    predscores_beta = []
    for y in xrange(len(predscores)):
        [predscores_beta.append(x) for x in predscores[y]]
    predscores_alpha = np.array(predscores_beta)
    # compute errors
    prederrors = predscores_alpha - actualscores
    meanerr = np.mean(np.abs(prederrors))
    rmsqerr = np.sqrt(np.mean(prederrors**2))
    # save + plot cluster distribution in brain
    brainplot(clust_disj, os.path.join(outdir,"cluster_disj_crossval.png"))
    outimg = os.path.join(outdir,'clusterdisj_crossval.nii')
    nib.Nifti1Image(clust_disj,imgaff,imghead).to_filename(outimg)
    return predscores_alpha, actualscores, meanerr, rmsqerr
Example #2
0
def _fit(X, y, behav_data=None):
    # run the SPM analysis (external workflow) in a LOO crossval
    # and save the directories in which the threshold images are located
    print "Fitting"
    print X
    print y
    print "doing spm cv"
    analdir = setup_spm(X, y)
    # get labels & clustermeans
    labels, nlabels = get_labels(analdir)
    # delete all the workflow directories again
    shutil.rmtree(os.path.realpath(os.path.join(analdir, '..')))
    clustermeans = get_clustermeans(X, labels, nlabels)
    #print "finding model"
    # make new design matrix (first behvars, then clustermeans)
    if behav_data is not None:
        X_new = np.hstack((behav_data, clustermeans))
        varidx, model = determine_model_all(X_new, y, behav_data.shape[1])
    else:
        X_new = clustermeans
        varidx, model = determine_model_all(X_new, y, 0)
    return model, varidx, labels, nlabels