def crossval(): """ perform a crossvalidation on the data (beh + precomputed brain) of all subjects """ predscores = [] actualscores = [] clust_disj = np.zeros(imgshape) for trainidx, testidx in cv.LeaveOneOut(subject_num): # n-p training files trainconfiles = [cf for i, cf in enumerate(confiles) if trainidx[i]] # left out subjects to test with testconfiles = [cf for i, cf in enumerate(confiles) if testidx[i]] ### get all the files from a leave2out crossval and get clusters _, name = os.path.split(testconfiles[0]) sid = name.split('con')[0][:-1] # sidx is the row# of the sid in our pdata variable sidx = np.nonzero(pdata.subject == sid)[0][0] analysisdirs = [] for idx in range(subject_num): if not idx == sidx: left_out = [sidx, idx] left_out.sort() analysisdirs.append(os.path.join(spmdir,'analysis_lo_%02d_%02d'%(left_out[0],left_out[1]),'thresh_h01_f05')) # get labels and clustermeans labels, nlabels = get_labels(analysisdirs) clustermeans_train = get_clustermeans(labels, nlabels, trainconfiles) clustermeans_test = get_clustermeans(labels, nlabels, testconfiles) # make desmats X_train = np.hstack((desmat[trainidx], clustermeans_train)) X_test = np.hstack((desmat[testidx], clustermeans_test)) # fit the model (by determining the best model first) varsidx, model = determine_model_all(X_train, responsevar[trainidx]) # save location of _selected_ clusters for clust in range(nlabels): if varsidx[behvars+clust]: idx = np.where(labels == clust+1) clust_disj[idx] += 1 # and save scores prediction = model.predict(X_test[:,varsidx]) predscores.append(prediction) actualscores.append(responsevar[testidx][0]) # rearrange vectors for error computation actualscores = np.array(actualscores) predscores_beta = [] for y in xrange(len(predscores)): [predscores_beta.append(x) for x in predscores[y]] predscores_alpha = np.array(predscores_beta) # compute errors prederrors = predscores_alpha - actualscores meanerr = np.mean(np.abs(prederrors)) rmsqerr = np.sqrt(np.mean(prederrors**2)) # save + plot cluster distribution in brain brainplot(clust_disj, os.path.join(outdir,"cluster_disj_crossval.png")) outimg = os.path.join(outdir,'clusterdisj_crossval.nii') nib.Nifti1Image(clust_disj,imgaff,imghead).to_filename(outimg) return predscores_alpha, actualscores, meanerr, rmsqerr
def _fit(X, y, behav_data=None): # run the SPM analysis (external workflow) in a LOO crossval # and save the directories in which the threshold images are located print "Fitting" print X print y print "doing spm cv" analdir = setup_spm(X, y) # get labels & clustermeans labels, nlabels = get_labels(analdir) # delete all the workflow directories again shutil.rmtree(os.path.realpath(os.path.join(analdir, '..'))) clustermeans = get_clustermeans(X, labels, nlabels) #print "finding model" # make new design matrix (first behvars, then clustermeans) if behav_data is not None: X_new = np.hstack((behav_data, clustermeans)) varidx, model = determine_model_all(X_new, y, behav_data.shape[1]) else: X_new = clustermeans varidx, model = determine_model_all(X_new, y, 0) return model, varidx, labels, nlabels