def crossval(): """ perform a crossvalidation on the data (beh + precomputed brain) of all subjects """ predscores = [] actualscores = [] clust_disj = np.zeros(imgshape) for trainidx, testidx in cv.LeaveOneOut(subject_num): # n-p training files trainconfiles = [cf for i, cf in enumerate(confiles) if trainidx[i]] # left out subjects to test with testconfiles = [cf for i, cf in enumerate(confiles) if testidx[i]] ### get all the files from a leave2out crossval and get clusters _, name = os.path.split(testconfiles[0]) sid = name.split('con')[0][:-1] # sidx is the row# of the sid in our pdata variable sidx = np.nonzero(pdata.subject == sid)[0][0] analysisdirs = [] for idx in range(subject_num): if not idx == sidx: left_out = [sidx, idx] left_out.sort() analysisdirs.append(os.path.join(spmdir,'analysis_lo_%02d_%02d'%(left_out[0],left_out[1]),'thresh_h01_f05')) # get labels and clustermeans labels, nlabels = get_labels(analysisdirs) clustermeans_train = get_clustermeans(labels, nlabels, trainconfiles) clustermeans_test = get_clustermeans(labels, nlabels, testconfiles) # make desmats X_train = np.hstack((desmat[trainidx], clustermeans_train)) X_test = np.hstack((desmat[testidx], clustermeans_test)) # fit the model (by determining the best model first) varsidx, model = determine_model_all(X_train, responsevar[trainidx]) # save location of _selected_ clusters for clust in range(nlabels): if varsidx[behvars+clust]: idx = np.where(labels == clust+1) clust_disj[idx] += 1 # and save scores prediction = model.predict(X_test[:,varsidx]) predscores.append(prediction) actualscores.append(responsevar[testidx][0]) # rearrange vectors for error computation actualscores = np.array(actualscores) predscores_beta = [] for y in xrange(len(predscores)): [predscores_beta.append(x) for x in predscores[y]] predscores_alpha = np.array(predscores_beta) # compute errors prederrors = predscores_alpha - actualscores meanerr = np.mean(np.abs(prederrors)) rmsqerr = np.sqrt(np.mean(prederrors**2)) # save + plot cluster distribution in brain brainplot(clust_disj, os.path.join(outdir,"cluster_disj_crossval.png")) outimg = os.path.join(outdir,'clusterdisj_crossval.nii') nib.Nifti1Image(clust_disj,imgaff,imghead).to_filename(outimg) return predscores_alpha, actualscores, meanerr, rmsqerr
def predict(self,X): """ predicts from the linear regression model Parameters: ----------- X: test samples used for prediction. Assumes same structure as above """ if self.model_ is not None: # get the confiles + clustermeans of the given subjects clustermeans = get_clustermeans(X, self.labels_, self.nlabels_) _, pdata = get_subject_data(X) features = np.hstack((pdata.lsas_pre[:, None], pdata.classtype[:, None] - 2)) # make new matrix (first behvars, then clustermeans) X_new = np.hstack((features, clustermeans)) prediction = self.model_.predict(X_new[:,self.varidx_]) return prediction else: raise Exception('no model')
def _fit(X, y, behav_data=None): # run the SPM analysis (external workflow) in a LOO crossval # and save the directories in which the threshold images are located print "Fitting" print X print y print "doing spm cv" analdir = setup_spm(X, y) # get labels & clustermeans labels, nlabels = get_labels(analdir) # delete all the workflow directories again shutil.rmtree(os.path.realpath(os.path.join(analdir, '..'))) clustermeans = get_clustermeans(X, labels, nlabels) #print "finding model" # make new design matrix (first behvars, then clustermeans) if behav_data is not None: X_new = np.hstack((behav_data, clustermeans)) varidx, model = determine_model_all(X_new, y, behav_data.shape[1]) else: X_new = clustermeans varidx, model = determine_model_all(X_new, y, 0) return model, varidx, labels, nlabels
labels, nlabels = get_labels(img.get_data()>ss.t.ppf(1-0.001,33), 20) data = img.get_data() data[labels==0] = 0 #cmeans = get_clustermeans(X, labels, nlabels) coords = get_coords(labels, img.get_affine()) show_slices(img, coords, threshold=0.5, prefix='uncorrected', show_colorbar=True) # <codecell> import os from scipy.ndimage import label base_dir = '/mindhive/gablab/satra/sad/' filename = os.path.join(base_dir, 'all_subjects', 'thresh', 'spmT_0001_thr.img') img=load(filename) labels, nlabels = label(abs(img.get_data())>0) cmeans = get_clustermeans(X, labels, nlabels) coords = get_coords(labels, img.get_affine()) show_slices(img, coords, prefix='topocorrect', show_colorbar=True) # <codecell> close('all') axes([0.1,0.1,0.7,0.8]) plot(y, cmeans[:,0], 'o', color=[0.2,0.2,0.2]) plot(y, cmeans[:,1], 'o', color=[0.6,0.6,0.6]) xlim([-5, 84]) xlabel('LSAS Delta') ylabel('contrast activation') legend(('Cluster 1', 'Cluster 2'), 'best', numpoints=1) plot_regression_line(y, cmeans[:,0], [-4,85], color=[0.2,0.2,0.2]) plot_regression_line(y, cmeans[:,1], [-4,85], color=[0.6,0.6,0.6])